require 'html/tokenizer'
require 'html/node'
require 'html/selector'
require 'html/sanitizer'
module HTML #:nodoc:
# A top-level HTML document. You give it a body of text, and it will parse that
# text into a tree of nodes.
class Document #:nodoc:
# The root of the parsed document.
attr_reader :root
# Create a new Document from the given text.
def initialize(text, strict=false, xml=false)
tokenizer = Tokenizer.new(text)
@root = Node.new(nil)
node_stack = [ @root ]
while token = tokenizer.next
node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)
node_stack.last.children << node unless node.tag? && node.closing == :close
if node.tag?
if node_stack.length > 1 && node.closing == :close
if node_stack.last.name == node.name
if node_stack.last.children.empty?
node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
end
node_stack.pop
else
open_start = node_stack.last.position - 20
open_start = 0 if open_start < 0
close_start = node.position - 20
close_start = 0 if close_start < 0
msg = <