Updated vendor copy of html-scanner lib, for bug fixes and optimizations

git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@1416 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
author: Jamis Buck <jamis@37signals.com> 2005-06-14 10:30:36 +0000
committer: Jamis Buck <jamis@37signals.com> 2005-06-14 10:30:36 +0000
commit: c23b2a4ad3f77222b6bfb219610fca79024ca4e5 (patch)
tree: 38c7eafd4577d8a0ff2effe0cc41d2f619292cf0 /actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
parent: bca13f727eb7410857059bab38dc58f81dbde321 (diff)
download: rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.tar.gz
rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.tar.bz2
rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.zip
1 files changed, 6 insertions, 7 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
index ce49b9c7e0..ce9d3b2800 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
@@ -1,6 +1,6 @@
 require 'strscan'
 
-module HTML#:nodoc:
+module HTML #:nodoc:
   
   # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
   # token is a string. Each string represents either "text", or an HTML element.
@@ -13,7 +13,7 @@ module HTML#:nodoc:
   #   while token = tokenizer.next
   #     p token
   #   end
-  class Tokenizer#:nodoc:
+  class Tokenizer #:nodoc:
     
     # The current (byte) position in the text
     attr_reader :position
@@ -51,7 +51,7 @@ module HTML#:nodoc:
         tag = @scanner.getch
         if @scanner.scan(/!--/) # comment
           tag << @scanner.matched
-          tag << @scanner.scan_until(/--\s*>/)
+          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
         elsif @scanner.scan(/!/) # doctype
           tag << @scanner.matched
           tag << consume_quoted_regions
@@ -63,14 +63,13 @@ module HTML#:nodoc:
 
       # Scan all text up to the next < character and return it.
       def scan_text
-        @scanner.getch + (@scanner.scan(/[^<]*/) || "")
+        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
       end
       
       # Counts the number of newlines in the text and updates the current line
       # accordingly.
       def update_current_line(text)
-        @current_line += text.scan(/\r\n|\r|\n/).length
-        text
+        text.scan(/\r?\n/) { @current_line += 1 }
       end
       
       # Skips over quoted strings, so that less-than and greater-than characters
@@ -89,7 +88,7 @@ module HTML#:nodoc:
           text << match
           break if delim == "<" || delim == ">"
 
-          # consume the conqued region
+          # consume the quoted region
           while match = @scanner.scan_until(/[\\#{delim}]/)
             text << match
             break if @scanner.matched == delim
author	Jamis Buck <jamis@37signals.com>	2005-06-14 10:30:36 +0000
committer	Jamis Buck <jamis@37signals.com>	2005-06-14 10:30:36 +0000
commit	c23b2a4ad3f77222b6bfb219610fca79024ca4e5 (patch)
tree	38c7eafd4577d8a0ff2effe0cc41d2f619292cf0 /actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
parent	bca13f727eb7410857059bab38dc58f81dbde321 (diff)
download	rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.tar.gz rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.tar.bz2 rails-c23b2a4ad3f77222b6bfb219610fca79024ca4e5.zip