Merge pull request #11032 from strzalek/extract-actionview

Extract ActionView to separate directory
author: Piotr Sarnacki <drogus@gmail.com> 2013-06-20 15:42:49 -0700
committer: Piotr Sarnacki <drogus@gmail.com> 2013-06-20 15:42:49 -0700
commit: a29f746398e7b0647885343e7f26d977dd251999 (patch)
tree: 1e2cd2ee1f8f31812c0acf71350ffe423ca8c5a9 /actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
parent: 7c69a829a311a31109939cff19b700b36b97d5c4 (diff)
parent: d6b1caa8f2011487c08b414605883f1f220d0aaa (diff)
download: rails-a29f746398e7b0647885343e7f26d977dd251999.tar.gz
rails-a29f746398e7b0647885343e7f26d977dd251999.tar.bz2
rails-a29f746398e7b0647885343e7f26d977dd251999.zip
1 files changed, 107 insertions, 0 deletions
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
new file mode 100644
index 0000000000..8ac8d34430
--- /dev/null
+++ b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
@@ -0,0 +1,107 @@
+require 'strscan'
+
+module HTML #:nodoc:
+
+  # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
+  # token is a string. Each string represents either "text", or an HTML element.
+  #
+  # This currently assumes valid XHTML, which means no free < or > characters.
+  #
+  # Usage:
+  #
+  #   tokenizer = HTML::Tokenizer.new(text)
+  #   while token = tokenizer.next
+  #     p token
+  #   end
+  class Tokenizer #:nodoc:
+
+    # The current (byte) position in the text
+    attr_reader :position
+
+    # The current line number
+    attr_reader :line
+
+    # Create a new Tokenizer for the given text.
+    def initialize(text)
+      text.encode!
+      @scanner = StringScanner.new(text)
+      @position = 0
+      @line = 0
+      @current_line = 1
+    end
+
+    # Return the next token in the sequence, or +nil+ if there are no more tokens in
+    # the stream.
+    def next
+      return nil if @scanner.eos?
+      @position = @scanner.pos
+      @line = @current_line
+      if @scanner.check(/<\S/)
+        update_current_line(scan_tag)
+      else
+        update_current_line(scan_text)
+      end
+    end
+
+    private
+
+      # Treat the text at the current position as a tag, and scan it. Supports
+      # comments, doctype tags, and regular tags, and ignores less-than and
+      # greater-than characters within quoted strings.
+      def scan_tag
+        tag = @scanner.getch
+        if @scanner.scan(/!--/) # comment
+          tag << @scanner.matched
+          tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
+        elsif @scanner.scan(/!\[CDATA\[/)
+          tag << @scanner.matched
+          tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
+        elsif @scanner.scan(/!/) # doctype
+          tag << @scanner.matched
+          tag << consume_quoted_regions
+        else
+          tag << consume_quoted_regions
+        end
+        tag
+      end
+
+      # Scan all text up to the next < character and return it.
+      def scan_text
+        "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
+      end
+
+      # Counts the number of newlines in the text and updates the current line
+      # accordingly.
+      def update_current_line(text)
+        text.scan(/\r?\n/) { @current_line += 1 }
+      end
+
+      # Skips over quoted strings, so that less-than and greater-than characters
+      # within the strings are ignored.
+      def consume_quoted_regions
+        text = ""
+        loop do
+          match = @scanner.scan_until(/['"<>]/) or break
+
+          delim = @scanner.matched
+          if delim == "<"
+            match = match.chop
+            @scanner.pos -= 1
+          end
+
+          text << match
+          break if delim == "<" || delim == ">"
+
+          # consume the quoted region
+          while match = @scanner.scan_until(/[\\#{delim}]/)
+            text << match
+            break if @scanner.matched == delim
+            break if @scanner.eos?
+            text << @scanner.getch # skip the escaped character
+          end
+        end
+        text
+      end
+  end
+
+end
author	Piotr Sarnacki <drogus@gmail.com>	2013-06-20 15:42:49 -0700
committer	Piotr Sarnacki <drogus@gmail.com>	2013-06-20 15:42:49 -0700
commit	a29f746398e7b0647885343e7f26d977dd251999 (patch)
tree	1e2cd2ee1f8f31812c0acf71350ffe423ca8c5a9 /actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
parent	7c69a829a311a31109939cff19b700b36b97d5c4 (diff)
parent	d6b1caa8f2011487c08b414605883f1f220d0aaa (diff)
download	rails-a29f746398e7b0647885343e7f26d977dd251999.tar.gz rails-a29f746398e7b0647885343e7f26d977dd251999.tar.bz2 rails-a29f746398e7b0647885343e7f26d977dd251999.zip