5 files changed, 28 insertions, 19 deletions
diff --git a/actionpack/CHANGELOG b/actionpack/CHANGELOG
index bea218be68..fa4944dd08 100644
--- a/actionpack/CHANGELOG
+++ b/actionpack/CHANGELOG
@@ -1,5 +1,7 @@
 *SVN*
 
+* Updated vendor copy of html-scanner lib to 0.5.1, for bug fixes and optimizations
+
 * Changed test requests to come from 0.0.0.0 instead of 127.0.0.1 such that they don't trigger debugging screens on exceptions, but instead call rescue_action_in_public
 
 * Modernize scaffolding to match the generator: use the new render method and change style from the warty @params["id"] to the sleek params[:id].  #1367
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
index e14d9de166..f8a37e9454 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
@@ -1,7 +1,7 @@
 require 'html/tokenizer'
 require 'html/node'
 
-module HTML#:nodoc:
+module HTML #:nodoc:
   
   # A top-level HTMl document. You give it a body of text, and it will parse that
   # text into a tree of nodes.
@@ -11,7 +11,7 @@ module HTML#:nodoc:
     attr_reader :root
 
     # Create a new Document from the given text.
-    def initialize(text)
+    def initialize(text, strict=false)
       tokenizer = Tokenizer.new(text)
       @root = Node.new(nil)
       node_stack = [ @root ]
@@ -28,7 +28,7 @@ module HTML#:nodoc:
               open_start = 0 if open_start < 0
               close_start = node.position - 20
               close_start = 0 if close_start < 0
-              warn <<EOF.strip
+              msg = <<EOF.strip
 ignoring attempt to close #{node_stack.last.name} with #{node.name}
   opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
   closed at byte #{node.position}, line #{node.line}
@@ -36,6 +36,7 @@ ignoring attempt to close #{node_stack.last.name} with #{node.name}
   text around open: #{text[open_start,40].inspect}
   text around close: #{text[close_start,40].inspect}
 EOF
+              strict ? raise(msg) : warn(msg)
             end
           elsif node.closing != :close
             node_stack.push node
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
index edfc57d8b6..8d1c711226 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
@@ -1,8 +1,8 @@
 require 'strscan'
 
-module HTML#:nodoc:
+module HTML #:nodoc:
   
-  class Conditions < Hash#:nodoc:
+  class Conditions < Hash #:nodoc:
     def initialize(hash)
       super()
       hash = { :content => hash } unless Hash === hash
@@ -54,7 +54,7 @@ module HTML#:nodoc:
   end
 
   # The base class of all nodes, textual and otherwise, in an HTML document.
-  class Node#:nodoc:
+  class Node #:nodoc:
     # The array of children of this node. Not all nodes have children.
     attr_reader :children
     
@@ -91,6 +91,8 @@ module HTML#:nodoc:
     # Search the children of this node for the first node for which #find
     # returns non +nil+. Returns the result of the #find call that succeeded.
     def find(conditions)
+      conditions = validate_conditions(conditions)
+
       @children.each do |child|        
         node = child.find(conditions)
         return node if node
@@ -101,6 +103,8 @@ module HTML#:nodoc:
     # Search for all nodes that match the given conditions, and return them
     # as an array.
     def find_all(conditions)
+      conditions = validate_conditions(conditions)
+
       matches = []
       matches << self if match(conditions)
       @children.each do |child|
@@ -183,7 +187,7 @@ module HTML#:nodoc:
   end
 
   # A node that represents text, rather than markup.
-  class Text < Node#:nodoc:
+  class Text < Node #:nodoc:
     
     attr_reader :content
     
@@ -239,7 +243,7 @@ module HTML#:nodoc:
   # A Tag is any node that represents markup. It may be an opening tag, a
   # closing tag, or a self-closing tag. It has a name, and may have a hash of
   # attributes.
-  class Tag < Node#:nodoc:
+  class Tag < Node #:nodoc:
     
     # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
     attr_reader :closing
@@ -268,7 +272,9 @@ module HTML#:nodoc:
 
     # Returns non-+nil+ if this tag can contain child nodes.
     def childless?
-      @name =~ /^(img|br|hr|link|meta|area|base|basefont|col|frame|input|isindex|param)$/o
+      !@closing.nil? ||
+        @name =~ /^(img|br|hr|link|meta|area|base|basefont|
+                    col|frame|input|isindex|param)$/ox
     end
 
     # Returns a textual representation of the node
@@ -284,6 +290,7 @@ module HTML#:nodoc:
         s << " /" if @closing == :self
         s << ">"
         @children.each { |child| s << child.to_s }
+        s << "</#{@name}>" if @closing != :self && !@children.empty?
         s
       end
     end
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
index ce49b9c7e0..ce9d3b2800 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
@@ -1,6 +1,6 @@
 require 'strscan'
 
-module HTML#:nodoc:
+module HTML #:nodoc:
   
   # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
   # token is a string. Each string represents either "text", or an HTML element.
@@ -13,7 +13,7 @@ module HTML#:nodoc:
   #   while token = tokenizer.next
   #     p token
   #   end
-  class Tokenizer#:nodoc:
+  class Tokenizer #:nodoc:
     
     # The current (byte) position in the text
     attr_reader :position
@@ -51,7 +51,7 @@ module HTML#:nodoc:
         tag = @scanner.getch
         if @scanner.scan(/!--/) # comment
           tag << @scanner.matched
-          tag << @scanner.scan_until(/--\s*>/)
+          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
         elsif @scanner.scan(/!/) # doctype
           tag << @scanner.matched
           tag << consume_quoted_regions
@@ -63,14 +63,13 @@ module HTML#:nodoc:
 
       # Scan all text up to the next < character and return it.
       def scan_text
-        @scanner.getch + (@scanner.scan(/[^<]*/) || "")
+        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
       end
       
       # Counts the number of newlines in the text and updates the current line
       # accordingly.
       def update_current_line(text)
-        @current_line += text.scan(/\r\n|\r|\n/).length
-        text
+        text.scan(/\r?\n/) { @current_line += 1 }
       end
       
       # Skips over quoted strings, so that less-than and greater-than characters
@@ -89,7 +88,7 @@ module HTML#:nodoc:
           text << match
           break if delim == "<" || delim == ">"
 
-          # consume the conqued region
+          # consume the quoted region
           while match = @scanner.scan_until(/[\\#{delim}]/)
             text << match
             break if @scanner.matched == delim
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
index 761ea40294..0b4d184a8b 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/version.rb
@@ -1,9 +1,9 @@
-module HTML#:nodoc:
-  module Version#:nodoc:
+module HTML #:nodoc:
+  module Version #:nodoc:
 
     MAJOR = 0
     MINOR = 5
-    TINY  = 0
+    TINY  = 1
 
     STRING = [ MAJOR, MINOR, TINY ].join(".")