aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
diff options
context:
space:
mode:
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html/document.rb')
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/document.rb63
1 files changed, 63 insertions, 0 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
new file mode 100644
index 0000000000..a196bdea44
--- /dev/null
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
@@ -0,0 +1,63 @@
+require 'html/tokenizer'
+require 'html/node'
+
+module HTML
+
+ # A top-level HTMl document. You give it a body of text, and it will parse that
+ # text into a tree of nodes.
+ class Document
+
+ # The root of the parsed document.
+ attr_reader :root
+
+ # Create a new Document from the given text.
+ def initialize(text)
+ tokenizer = Tokenizer.new(text)
+ @root = Node.new(nil)
+ node_stack = [ @root ]
+ while token = tokenizer.next
+ node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token)
+
+ node_stack.last.children << node unless node.tag? && node.closing == :close
+ if node.tag? && !node.childless?
+ if node_stack.length > 1 && node.closing == :close
+ if node_stack.last.name == node.name
+ node_stack.pop
+ else
+ open_start = node_stack.last.position - 20
+ open_start = 0 if open_start < 0
+ close_start = node.position - 20
+ close_start = 0 if close_start < 0
+ warn <<EOF.strip
+ignoring attempt to close #{node_stack.last.name} with #{node.name}
+ opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
+ closed at byte #{node.position}, line #{node.line}
+ attributes at open: #{node_stack.last.attributes.inspect}
+ text around open: #{text[open_start,40].inspect}
+ text around close: #{text[close_start,40].inspect}
+EOF
+ end
+ elsif node.closing != :close
+ node_stack.push node
+ end
+ end
+ end
+ end
+
+ # Search the tree for (and return) the first node that matches the given
+ # conditions. The conditions are interpreted differently for different node
+ # types, see HTML::Text#find and HTML::Tag#find.
+ def find(conditions)
+ @root.find(conditions)
+ end
+
+ # Search the tree for (and return) all nodes that match the given
+ # conditions. The conditions are interpreted differently for different node
+ # types, see HTML::Text#find and HTML::Tag#find.
+ def find_all(conditions)
+ @root.find_all(conditions)
+ end
+
+ end
+
+end