aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
diff options
context:
space:
mode:
authorPiotr Sarnacki <drogus@gmail.com>2012-08-28 11:24:29 +0200
committerPiotr Sarnacki <drogus@gmail.com>2012-08-28 11:24:29 +0200
commit4f093d81aca814b7433c4a1366985327b4ad0708 (patch)
treed8361811cbf1f5b3947c57cbe1be1016c2835b2d /actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
parent1ec1eb2ff2ad5de70db7a632b93641a06a623a42 (diff)
parentdaa0ed3af2e6443e26d658282b8ed654b5a32926 (diff)
downloadrails-4f093d81aca814b7433c4a1366985327b4ad0708.tar.gz
rails-4f093d81aca814b7433c4a1366985327b4ad0708.tar.bz2
rails-4f093d81aca814b7433c4a1366985327b4ad0708.zip
Merge branch 'actionview-decoupling'
This branch contains set of changes that will allow to extract Action View out of Action Pack in the future. This work will be probably done after Rails 4.0 release, because of a few deprecations that were done to make decoupling possible.
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb')
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb107
1 files changed, 0 insertions, 107 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
deleted file mode 100644
index 8ac8d34430..0000000000
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
+++ /dev/null
@@ -1,107 +0,0 @@
-require 'strscan'
-
-module HTML #:nodoc:
-
- # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
- # token is a string. Each string represents either "text", or an HTML element.
- #
- # This currently assumes valid XHTML, which means no free < or > characters.
- #
- # Usage:
- #
- # tokenizer = HTML::Tokenizer.new(text)
- # while token = tokenizer.next
- # p token
- # end
- class Tokenizer #:nodoc:
-
- # The current (byte) position in the text
- attr_reader :position
-
- # The current line number
- attr_reader :line
-
- # Create a new Tokenizer for the given text.
- def initialize(text)
- text.encode!
- @scanner = StringScanner.new(text)
- @position = 0
- @line = 0
- @current_line = 1
- end
-
- # Return the next token in the sequence, or +nil+ if there are no more tokens in
- # the stream.
- def next
- return nil if @scanner.eos?
- @position = @scanner.pos
- @line = @current_line
- if @scanner.check(/<\S/)
- update_current_line(scan_tag)
- else
- update_current_line(scan_text)
- end
- end
-
- private
-
- # Treat the text at the current position as a tag, and scan it. Supports
- # comments, doctype tags, and regular tags, and ignores less-than and
- # greater-than characters within quoted strings.
- def scan_tag
- tag = @scanner.getch
- if @scanner.scan(/!--/) # comment
- tag << @scanner.matched
- tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!\[CDATA\[/)
- tag << @scanner.matched
- tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!/) # doctype
- tag << @scanner.matched
- tag << consume_quoted_regions
- else
- tag << consume_quoted_regions
- end
- tag
- end
-
- # Scan all text up to the next < character and return it.
- def scan_text
- "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
- end
-
- # Counts the number of newlines in the text and updates the current line
- # accordingly.
- def update_current_line(text)
- text.scan(/\r?\n/) { @current_line += 1 }
- end
-
- # Skips over quoted strings, so that less-than and greater-than characters
- # within the strings are ignored.
- def consume_quoted_regions
- text = ""
- loop do
- match = @scanner.scan_until(/['"<>]/) or break
-
- delim = @scanner.matched
- if delim == "<"
- match = match.chop
- @scanner.pos -= 1
- end
-
- text << match
- break if delim == "<" || delim == ">"
-
- # consume the quoted region
- while match = @scanner.scan_until(/[\\#{delim}]/)
- text << match
- break if @scanner.matched == delim
- break if @scanner.eos?
- text << @scanner.getch # skip the escaped character
- end
- end
- text
- end
- end
-
-end