aboutsummaryrefslogtreecommitdiffstats
path: root/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
diff options
context:
space:
mode:
Diffstat (limited to 'actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb')
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb107
1 files changed, 0 insertions, 107 deletions
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
deleted file mode 100644
index adf4e45930..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
+++ /dev/null
@@ -1,107 +0,0 @@
-require 'strscan'
-
-module HTML #:nodoc:
-
- # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
- # token is a string. Each string represents either "text", or an HTML element.
- #
- # This currently assumes valid XHTML, which means no free < or > characters.
- #
- # Usage:
- #
- # tokenizer = HTML::Tokenizer.new(text)
- # while token = tokenizer.next
- # p token
- # end
- class Tokenizer #:nodoc:
-
- # The current (byte) position in the text
- attr_reader :position
-
- # The current line number
- attr_reader :line
-
- # Create a new Tokenizer for the given text.
- def initialize(text)
- text.encode!
- @scanner = StringScanner.new(text)
- @position = 0
- @line = 0
- @current_line = 1
- end
-
- # Returns the next token in the sequence, or +nil+ if there are no more tokens in
- # the stream.
- def next
- return nil if @scanner.eos?
- @position = @scanner.pos
- @line = @current_line
- if @scanner.check(/<\S/)
- update_current_line(scan_tag)
- else
- update_current_line(scan_text)
- end
- end
-
- private
-
- # Treat the text at the current position as a tag, and scan it. Supports
- # comments, doctype tags, and regular tags, and ignores less-than and
- # greater-than characters within quoted strings.
- def scan_tag
- tag = @scanner.getch
- if @scanner.scan(/!--/) # comment
- tag << @scanner.matched
- tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!\[CDATA\[/)
- tag << @scanner.matched
- tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!/) # doctype
- tag << @scanner.matched
- tag << consume_quoted_regions
- else
- tag << consume_quoted_regions
- end
- tag
- end
-
- # Scan all text up to the next < character and return it.
- def scan_text
- "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
- end
-
- # Counts the number of newlines in the text and updates the current line
- # accordingly.
- def update_current_line(text)
- text.scan(/\r?\n/) { @current_line += 1 }
- end
-
- # Skips over quoted strings, so that less-than and greater-than characters
- # within the strings are ignored.
- def consume_quoted_regions
- text = ""
- loop do
- match = @scanner.scan_until(/['"<>]/) or break
-
- delim = @scanner.matched
- if delim == "<"
- match = match.chop
- @scanner.pos -= 1
- end
-
- text << match
- break if delim == "<" || delim == ">"
-
- # consume the quoted region
- while match = @scanner.scan_until(/[\\#{delim}]/)
- text << match
- break if @scanner.matched == delim
- break if @scanner.eos?
- text << @scanner.getch # skip the escaped character
- end
- end
- text
- end
- end
-
-end