From c23b2a4ad3f77222b6bfb219610fca79024ca4e5 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 14 Jun 2005 10:30:36 +0000 Subject: Updated vendor copy of html-scanner lib, for bug fixes and optimizations git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1416 5ecf4fe2-1ee6-0310-87b1-e25e094e27de --- actionpack/CHANGELOG | 2 ++ .../vendor/html-scanner/html/document.rb | 7 ++++--- .../vendor/html-scanner/html/node.rb | 19 +++++++++++++------ .../vendor/html-scanner/html/tokenizer.rb | 13 ++++++------- .../vendor/html-scanner/html/version.rb | 6 +++--- 5 files changed, 28 insertions(+), 19 deletions(-) (limited to 'actionpack') diff --git a/actionpack/CHANGELOG b/actionpack/CHANGELOG index bea218be68..fa4944dd08 100644 --- a/actionpack/CHANGELOG +++ b/actionpack/CHANGELOG @@ -1,5 +1,7 @@ *SVN* +* Updated vendor copy of html-scanner lib to 0.5.1, for bug fixes and optimizations + * Changed test requests to come from 0.0.0.0 instead of 127.0.0.1 such that they don't trigger debugging screens on exceptions, but instead call rescue_action_in_public * Modernize scaffolding to match the generator: use the new render method and change style from the warty @params["id"] to the sleek params[:id]. #1367 diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb index e14d9de166..f8a37e9454 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb @@ -1,7 +1,7 @@ require 'html/tokenizer' require 'html/node' -module HTML#:nodoc: +module HTML #:nodoc: # A top-level HTMl document. You give it a body of text, and it will parse that # text into a tree of nodes. @@ -11,7 +11,7 @@ module HTML#:nodoc: attr_reader :root # Create a new Document from the given text. - def initialize(text) + def initialize(text, strict=false) tokenizer = Tokenizer.new(text) @root = Node.new(nil) node_stack = [ @root ] @@ -28,7 +28,7 @@ module HTML#:nodoc: open_start = 0 if open_start < 0 close_start = node.position - 20 close_start = 0 if close_start < 0 - warn < hash } unless Hash === hash @@ -54,7 +54,7 @@ module HTML#:nodoc: end # The base class of all nodes, textual and otherwise, in an HTML document. - class Node#:nodoc: + class Node #:nodoc: # The array of children of this node. Not all nodes have children. attr_reader :children @@ -91,6 +91,8 @@ module HTML#:nodoc: # Search the children of this node for the first node for which #find # returns non +nil+. Returns the result of the #find call that succeeded. def find(conditions) + conditions = validate_conditions(conditions) + @children.each do |child| node = child.find(conditions) return node if node @@ -101,6 +103,8 @@ module HTML#:nodoc: # Search for all nodes that match the given conditions, and return them # as an array. def find_all(conditions) + conditions = validate_conditions(conditions) + matches = [] matches << self if match(conditions) @children.each do |child| @@ -183,7 +187,7 @@ module HTML#:nodoc: end # A node that represents text, rather than markup. - class Text < Node#:nodoc: + class Text < Node #:nodoc: attr_reader :content @@ -239,7 +243,7 @@ module HTML#:nodoc: # A Tag is any node that represents markup. It may be an opening tag, a # closing tag, or a self-closing tag. It has a name, and may have a hash of # attributes. - class Tag < Node#:nodoc: + class Tag < Node #:nodoc: # Either +nil+, :close, or :self attr_reader :closing @@ -268,7 +272,9 @@ module HTML#:nodoc: # Returns non-+nil+ if this tag can contain child nodes. def childless? - @name =~ /^(img|br|hr|link|meta|area|base|basefont|col|frame|input|isindex|param)$/o + !@closing.nil? || + @name =~ /^(img|br|hr|link|meta|area|base|basefont| + col|frame|input|isindex|param)$/ox end # Returns a textual representation of the node @@ -284,6 +290,7 @@ module HTML#:nodoc: s << " /" if @closing == :self s << ">" @children.each { |child| s << child.to_s } + s << "" if @closing != :self && !@children.empty? s end end diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb index ce49b9c7e0..ce9d3b2800 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb @@ -1,6 +1,6 @@ require 'strscan' -module HTML#:nodoc: +module HTML #:nodoc: # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each # token is a string. Each string represents either "text", or an HTML element. @@ -13,7 +13,7 @@ module HTML#:nodoc: # while token = tokenizer.next # p token # end - class Tokenizer#:nodoc: + class Tokenizer #:nodoc: # The current (byte) position in the text attr_reader :position @@ -51,7 +51,7 @@ module HTML#:nodoc: tag = @scanner.getch if @scanner.scan(/!--/) # comment tag << @scanner.matched - tag << @scanner.scan_until(/--\s*>/) + tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/)) elsif @scanner.scan(/!/) # doctype tag << @scanner.matched tag << consume_quoted_regions @@ -63,14 +63,13 @@ module HTML#:nodoc: # Scan all text up to the next < character and return it. def scan_text - @scanner.getch + (@scanner.scan(/[^<]*/) || "") + "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}" end # Counts the number of newlines in the text and updates the current line # accordingly. def update_current_line(text) - @current_line += text.scan(/\r\n|\r|\n/).length - text + text.scan(/\r?\n/) { @current_line += 1 } end # Skips over quoted strings, so that less-than and greater-than characters @@ -89,7 +88,7 @@ module HTML#:nodoc: text << match break if delim == "<" || delim == ">" - # consume the conqued region + # consume the quoted region while match = @scanner.scan_until(/[\\#{delim}]/) text << match break if @scanner.matched == delim diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb index 761ea40294..0b4d184a8b 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb @@ -1,9 +1,9 @@ -module HTML#:nodoc: - module Version#:nodoc: +module HTML #:nodoc: + module Version #:nodoc: MAJOR = 0 MINOR = 5 - TINY = 0 + TINY = 1 STRING = [ MAJOR, MINOR, TINY ].join(".") -- cgit v1.2.3