diff options
author | Timm <kaspth@gmail.com> | 2014-05-23 23:34:46 +0200 |
---|---|---|
committer | Timm <kaspth@gmail.com> | 2014-06-16 21:04:23 +0200 |
commit | 33019a321c7b8083068850750a3f4c466ae7c059 (patch) | |
tree | 52ebd6c136384ce04d2bb00c301177fbc434d1aa /actionview | |
parent | 017ddc6e248cea9bdfda4496c1505585b7452655 (diff) | |
download | rails-33019a321c7b8083068850750a3f4c466ae7c059.tar.gz rails-33019a321c7b8083068850750a3f4c466ae7c059.tar.bz2 rails-33019a321c7b8083068850750a3f4c466ae7c059.zip |
Remove html-scanner and its tests.
Diffstat (limited to 'actionview')
13 files changed, 0 insertions, 2454 deletions
diff --git a/actionview/lib/action_view/vendor/html-scanner.rb b/actionview/lib/action_view/vendor/html-scanner.rb deleted file mode 100644 index ef09b446a5..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner.rb +++ /dev/null @@ -1,22 +0,0 @@ -require 'active_support/deprecation' -$LOAD_PATH.unshift "#{File.dirname(__FILE__)}/html-scanner" - -module HTML - extend ActiveSupport::Autoload - - eager_autoload do - autoload :Scanner, 'html/sanitizer' - autoload :CDATA, 'html/node' - autoload :Document, 'html/document' - autoload :FullSanitizer, 'html/sanitizer' - autoload :LinkSanitizer, 'html/sanitizer' - autoload :Node, 'html/node' - autoload :Sanitizer, 'html/sanitizer' - autoload :Selector, 'html/selector' - autoload :Tag, 'html/node' - autoload :Text, 'html/node' - autoload :Tokenizer, 'html/tokenizer' - autoload :Version, 'html/version' - autoload :WhiteListSanitizer, 'html/sanitizer' - end -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/document.rb b/actionview/lib/action_view/vendor/html-scanner/html/document.rb deleted file mode 100644 index 386820300a..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/document.rb +++ /dev/null @@ -1,68 +0,0 @@ -require 'html/tokenizer' -require 'html/node' -require 'html/selector' -require 'html/sanitizer' - -module HTML #:nodoc: - # A top-level HTML document. You give it a body of text, and it will parse that - # text into a tree of nodes. - class Document #:nodoc: - - # The root of the parsed document. - attr_reader :root - - # Create a new Document from the given text. - def initialize(text, strict=false, xml=false) - tokenizer = Tokenizer.new(text) - @root = Node.new(nil) - node_stack = [ @root ] - while token = tokenizer.next - node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict) - - node_stack.last.children << node unless node.tag? && node.closing == :close - if node.tag? - if node_stack.length > 1 && node.closing == :close - if node_stack.last.name == node.name - if node_stack.last.children.empty? - node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "") - end - node_stack.pop - else - open_start = node_stack.last.position - 20 - open_start = 0 if open_start < 0 - close_start = node.position - 20 - close_start = 0 if close_start < 0 - msg = <<EOF.strip -ignoring attempt to close #{node_stack.last.name} with #{node.name} - opened at byte #{node_stack.last.position}, line #{node_stack.last.line} - closed at byte #{node.position}, line #{node.line} - attributes at open: #{node_stack.last.attributes.inspect} - text around open: #{text[open_start,40].inspect} - text around close: #{text[close_start,40].inspect} -EOF - strict ? raise(msg) : warn(msg) - end - elsif !node.childless?(xml) && node.closing != :close - node_stack.push node - end - end - end - end - - # Search the tree for (and return) the first node that matches the given - # conditions. The conditions are interpreted differently for different node - # types, see HTML::Text#find and HTML::Tag#find. - def find(conditions) - @root.find(conditions) - end - - # Search the tree for (and return) all nodes that match the given - # conditions. The conditions are interpreted differently for different node - # types, see HTML::Text#find and HTML::Tag#find. - def find_all(conditions) - @root.find_all(conditions) - end - - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/node.rb b/actionview/lib/action_view/vendor/html-scanner/html/node.rb deleted file mode 100644 index 27f0f2f6f8..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/node.rb +++ /dev/null @@ -1,532 +0,0 @@ -require 'strscan' - -module HTML #:nodoc: - - class Conditions < Hash #:nodoc: - def initialize(hash) - super() - hash = { :content => hash } unless Hash === hash - hash = keys_to_symbols(hash) - hash.each do |k,v| - case k - when :tag, :content then - # keys are valid, and require no further processing - when :attributes then - hash[k] = keys_to_strings(v) - when :parent, :child, :ancestor, :descendant, :sibling, :before, - :after - hash[k] = Conditions.new(v) - when :children - hash[k] = v = keys_to_symbols(v) - v.each do |key,value| - case key - when :count, :greater_than, :less_than - # keys are valid, and require no further processing - when :only - v[key] = Conditions.new(value) - else - raise "illegal key #{key.inspect} => #{value.inspect}" - end - end - else - raise "illegal key #{k.inspect} => #{v.inspect}" - end - end - update hash - end - - private - - def keys_to_strings(hash) - Hash[hash.keys.map {|k| [k.to_s, hash[k]]}] - end - - def keys_to_symbols(hash) - Hash[hash.keys.map do |k| - raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym) - [k.to_sym, hash[k]] - end] - end - end - - # The base class of all nodes, textual and otherwise, in an HTML document. - class Node #:nodoc: - # The array of children of this node. Not all nodes have children. - attr_reader :children - - # The parent node of this node. All nodes have a parent, except for the - # root node. - attr_reader :parent - - # The line number of the input where this node was begun - attr_reader :line - - # The byte position in the input where this node was begun - attr_reader :position - - # Create a new node as a child of the given parent. - def initialize(parent, line=0, pos=0) - @parent = parent - @children = [] - @line, @position = line, pos - end - - # Returns a textual representation of the node. - def to_s - @children.join() - end - - # Returns false (subclasses must override this to provide specific matching - # behavior.) +conditions+ may be of any type. - def match(conditions) - false - end - - # Search the children of this node for the first node for which #find - # returns non +nil+. Returns the result of the #find call that succeeded. - def find(conditions) - conditions = validate_conditions(conditions) - @children.each do |child| - node = child.find(conditions) - return node if node - end - nil - end - - # Search for all nodes that match the given conditions, and return them - # as an array. - def find_all(conditions) - conditions = validate_conditions(conditions) - - matches = [] - matches << self if match(conditions) - @children.each do |child| - matches.concat child.find_all(conditions) - end - matches - end - - # Returns +false+. Subclasses may override this if they define a kind of - # tag. - def tag? - false - end - - def validate_conditions(conditions) - Conditions === conditions ? conditions : Conditions.new(conditions) - end - - def ==(node) - return false unless self.class == node.class && children.size == node.children.size - - equivalent = true - - children.size.times do |i| - equivalent &&= children[i] == node.children[i] - end - - equivalent - end - - class <<self - def parse(parent, line, pos, content, strict=true) - if content !~ /^<\S/ - Text.new(parent, line, pos, content) - else - scanner = StringScanner.new(content) - - unless scanner.skip(/</) - if strict - raise "expected <" - else - return Text.new(parent, line, pos, content) - end - end - - if scanner.skip(/!\[CDATA\[/) - unless scanner.skip_until(/\]\]>/) - if strict - raise "expected ]]> (got #{scanner.rest.inspect} for #{content})" - else - scanner.skip_until(/\Z/) - end - end - - return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, '')) - end - - closing = ( scanner.scan(/\//) ? :close : nil ) - return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/) - name.downcase! - - unless closing - scanner.skip(/\s*/) - attributes = {} - while attr = scanner.scan(/[-\w:]+/) - value = true - if scanner.scan(/\s*=\s*/) - if delim = scanner.scan(/['"]/) - value = "" - while text = scanner.scan(/[^#{delim}\\]+|./) - case text - when "\\" then - value << text - break if scanner.eos? - value << scanner.getch - when delim - break - else value << text - end - end - else - value = scanner.scan(/[^\s>\/]+/) - end - end - attributes[attr.downcase] = value - scanner.skip(/\s*/) - end - - closing = ( scanner.scan(/\//) ? :self : nil ) - end - - unless scanner.scan(/\s*>/) - if strict - raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" - else - # throw away all text until we find what we're looking for - scanner.skip_until(/>/) or scanner.terminate - end - end - - Tag.new(parent, line, pos, name, attributes, closing) - end - end - end - end - - # A node that represents text, rather than markup. - class Text < Node #:nodoc: - - attr_reader :content - - # Creates a new text node as a child of the given parent, with the given - # content. - def initialize(parent, line, pos, content) - super(parent, line, pos) - @content = content - end - - # Returns the content of this node. - def to_s - @content - end - - # Returns +self+ if this node meets the given conditions. Text nodes support - # conditions of the following kinds: - # - # * if +conditions+ is a string, it must be a substring of the node's - # content - # * if +conditions+ is a regular expression, it must match the node's - # content - # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that - # is either a string or a regexp, and which is interpreted as described - # above. - def find(conditions) - match(conditions) && self - end - - # Returns non-+nil+ if this node meets the given conditions, or +nil+ - # otherwise. See the discussion of #find for the valid conditions. - def match(conditions) - case conditions - when String - @content == conditions - when Regexp - @content =~ conditions - when Hash - conditions = validate_conditions(conditions) - - # Text nodes only have :content, :parent, :ancestor - unless (conditions.keys - [:content, :parent, :ancestor]).empty? - return false - end - - match(conditions[:content]) - else - nil - end - end - - def ==(node) - return false unless super - content == node.content - end - end - - # A CDATA node is simply a text node with a specialized way of displaying - # itself. - class CDATA < Text #:nodoc: - def to_s - "<![CDATA[#{super}]]>" - end - end - - # A Tag is any node that represents markup. It may be an opening tag, a - # closing tag, or a self-closing tag. It has a name, and may have a hash of - # attributes. - class Tag < Node #:nodoc: - - # Either +nil+, <tt>:close</tt>, or <tt>:self</tt> - attr_reader :closing - - # Either +nil+, or a hash of attributes for this node. - attr_reader :attributes - - # The name of this tag. - attr_reader :name - - # Create a new node as a child of the given parent, using the given content - # to describe the node. It will be parsed and the node name, attributes and - # closing status extracted. - def initialize(parent, line, pos, name, attributes, closing) - super(parent, line, pos) - @name = name - @attributes = attributes - @closing = closing - end - - # A convenience for obtaining an attribute of the node. Returns +nil+ if - # the node has no attributes. - def [](attr) - @attributes ? @attributes[attr] : nil - end - - # Returns non-+nil+ if this tag can contain child nodes. - def childless?(xml = false) - return false if xml && @closing.nil? - !@closing.nil? || - @name =~ /^(img|br|hr|link|meta|area|base|basefont| - col|frame|input|isindex|param)$/ox - end - - # Returns a textual representation of the node - def to_s - if @closing == :close - "</#{@name}>" - else - s = "<#{@name}" - @attributes.each do |k,v| - s << " #{k}" - s << "=\"#{v}\"" if String === v - end - s << " /" if @closing == :self - s << ">" - @children.each { |child| s << child.to_s } - s << "</#{@name}>" if @closing != :self && !@children.empty? - s - end - end - - # If either the node or any of its children meet the given conditions, the - # matching node is returned. Otherwise, +nil+ is returned. (See the - # description of the valid conditions in the +match+ method.) - def find(conditions) - match(conditions) && self || super - end - - # Returns +true+, indicating that this node represents an HTML tag. - def tag? - true - end - - # Returns +true+ if the node meets any of the given conditions. The - # +conditions+ parameter must be a hash of any of the following keys - # (all are optional): - # - # * <tt>:tag</tt>: the node name must match the corresponding value - # * <tt>:attributes</tt>: a hash. The node's values must match the - # corresponding values in the hash. - # * <tt>:parent</tt>: a hash. The node's parent must match the - # corresponding hash. - # * <tt>:child</tt>: a hash. At least one of the node's immediate children - # must meet the criteria described by the hash. - # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must - # meet the criteria described by the hash. - # * <tt>:descendant</tt>: a hash. At least one of the node's descendants - # must meet the criteria described by the hash. - # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must - # meet the criteria described by the hash. - # * <tt>:after</tt>: a hash. The node must be after any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:before</tt>: a hash. The node must be before any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the - # keys: - # ** <tt>:count</tt>: either a number or a range which must equal (or - # include) the number of children that match. - # ** <tt>:less_than</tt>: the number of matching children must be less than - # this number. - # ** <tt>:greater_than</tt>: the number of matching children must be - # greater than this number. - # ** <tt>:only</tt>: another hash consisting of the keys to use - # to match on the children, and only matching children will be - # counted. - # - # Conditions are matched using the following algorithm: - # - # * if the condition is a string, it must be a substring of the value. - # * if the condition is a regexp, it must match the value. - # * if the condition is a number, the value must match number.to_s. - # * if the condition is +true+, the value must not be +nil+. - # * if the condition is +false+ or +nil+, the value must be +nil+. - # - # Usage: - # - # # test if the node is a "span" tag - # node.match tag: "span" - # - # # test if the node's parent is a "div" - # node.match parent: { tag: "div" } - # - # # test if any of the node's ancestors are "table" tags - # node.match ancestor: { tag: "table" } - # - # # test if any of the node's immediate children are "em" tags - # node.match child: { tag: "em" } - # - # # test if any of the node's descendants are "strong" tags - # node.match descendant: { tag: "strong" } - # - # # test if the node has between 2 and 4 span tags as immediate children - # node.match children: { count: 2..4, only: { tag: "span" } } - # - # # get funky: test to see if the node is a "div", has a "ul" ancestor - # # and an "li" parent (with "class" = "enum"), and whether or not it has - # # a "span" descendant that contains # text matching /hello world/: - # node.match tag: "div", - # ancestor: { tag: "ul" }, - # parent: { tag: "li", - # attributes: { class: "enum" } }, - # descendant: { tag: "span", - # child: /hello world/ } - def match(conditions) - conditions = validate_conditions(conditions) - # check content of child nodes - if conditions[:content] - if children.empty? - return false unless match_condition("", conditions[:content]) - else - return false unless children.find { |child| child.match(conditions[:content]) } - end - end - - # test the name - return false unless match_condition(@name, conditions[:tag]) if conditions[:tag] - - # test attributes - (conditions[:attributes] || {}).each do |key, value| - return false unless match_condition(self[key], value) - end - - # test parent - return false unless parent.match(conditions[:parent]) if conditions[:parent] - - # test children - return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] - - # test ancestors - if conditions[:ancestor] - return false unless catch :found do - p = self - throw :found, true if p.match(conditions[:ancestor]) while p = p.parent - end - end - - # test descendants - if conditions[:descendant] - return false unless children.find do |child| - # test the child - child.match(conditions[:descendant]) || - # test the child's descendants - child.match(:descendant => conditions[:descendant]) - end - end - - # count children - if opts = conditions[:children] - matches = children.select do |c| - (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) - end - - matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] - opts.each do |key, value| - next if key == :only - case key - when :count - if Integer === value - return false if matches.length != value - else - return false unless value.include?(matches.length) - end - when :less_than - return false unless matches.length < value - when :greater_than - return false unless matches.length > value - else raise "unknown count condition #{key}" - end - end - end - - # test siblings - if conditions[:sibling] || conditions[:before] || conditions[:after] - siblings = parent ? parent.children : [] - self_index = siblings.index(self) - - if conditions[:sibling] - return false unless siblings.detect do |s| - s != self && s.match(conditions[:sibling]) - end - end - - if conditions[:before] - return false unless siblings[self_index+1..-1].detect do |s| - s != self && s.match(conditions[:before]) - end - end - - if conditions[:after] - return false unless siblings[0,self_index].detect do |s| - s != self && s.match(conditions[:after]) - end - end - end - - true - end - - def ==(node) - return false unless super - return false unless closing == node.closing && self.name == node.name - attributes == node.attributes - end - - private - # Match the given value to the given condition. - def match_condition(value, condition) - case condition - when String - value && value == condition - when Regexp - value && value.match(condition) - when Numeric - value == condition.to_s - when true - !value.nil? - when false, nil - value.nil? - else - false - end - end - end -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb deleted file mode 100644 index 36ec3ef6b3..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb +++ /dev/null @@ -1,202 +0,0 @@ -require 'set' -require 'cgi' -require 'active_support/core_ext/module/attribute_accessors' - -module HTML - module Scanner - def full_sanitizer - HTML::FullSanitizer - end - - def link_sanitizer - HTML::LinkSanitizer - end - - def white_list_sanitizer - HTML::WhiteListSanitizer - end - end - - class Sanitizer - def sanitize(text, options = {}) - validate_options(options) - return text unless sanitizeable?(text) - tokenize(text, options).join - end - - def sanitizeable?(text) - !(text.nil? || text.empty? || !text.index("<")) - end - - protected - def tokenize(text, options) - tokenizer = HTML::Tokenizer.new(text) - result = [] - while token = tokenizer.next - node = Node.parse(nil, 0, 0, token, false) - process_node node, result, options - end - result - end - - def process_node(node, result, options) - result << node.to_s - end - - def validate_options(options) - if options[:tags] && !options[:tags].is_a?(Enumerable) - raise ArgumentError, "You should pass :tags as an Enumerable" - end - - if options[:attributes] && !options[:attributes].is_a?(Enumerable) - raise ArgumentError, "You should pass :attributes as an Enumerable" - end - end - end - - class FullSanitizer < Sanitizer - def sanitize(text, options = {}) - result = super - # strip any comments, and if they have a newline at the end (ie. line with - # only a comment) strip that too - result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m) - # Recurse - handle all dirty nested tags - result == text ? result : sanitize(result, options) - end - - def process_node(node, result, options) - result << node.to_s if node.class == HTML::Text - end - end - - class LinkSanitizer < FullSanitizer - cattr_accessor :included_tags, :instance_writer => false - self.included_tags = Set.new(%w(a href)) - - def sanitizeable?(text) - !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">"))) - end - - protected - def process_node(node, result, options) - result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) - end - end - - class WhiteListSanitizer < Sanitizer - [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, - :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| - class_attribute attr, :instance_writer => false - end - - # A regular expression of the valid characters used to separate protocols like - # the ':' in 'http://foo.com' - self.protocol_separator = /:|(�*58)|(p)|(�*3a)|(%|%)3A/i - - # Specifies a Set of HTML attributes that can have URIs. - self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) - - # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed - # to just escaping harmless tags like <font> - self.bad_tags = Set.new(%w(script)) - - # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr - acronym a img blockquote del ins)) - - # Specifies the default Set of html attributes that the #sanitize helper will leave - # in the allowed tag. - self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto - feed svn urn aim rsync tag ssh sftp rtsp afs)) - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display - elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height - overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation - speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space - width)) - - # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center - collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal - nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) - - # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. - self.shorthand_css_properties = Set.new(%w(background border margin padding)) - - # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute - def sanitize_css(style) - # disallow urls - style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') - - # gauntlet - if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ || - style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/ - return '' - end - - clean = [] - style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| - if allowed_css_properties.include?(prop.downcase) - clean << prop + ': ' + val + ';' - elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) - unless val.split().any? do |keyword| - !allowed_css_keywords.include?(keyword) && - keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/ - end - clean << prop + ': ' + val + ';' - end - end - end - clean.join(' ') - end - - protected - def tokenize(text, options) - options[:parent] = [] - options[:attributes] ||= allowed_attributes - options[:tags] ||= allowed_tags - super - end - - def process_node(node, result, options) - result << case node - when HTML::Tag - if node.closing == :close - options[:parent].shift - else - options[:parent].unshift node.name - end - - process_attributes_for node, options - - options[:tags].include?(node.name) ? node : nil - else - bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "<") - end - end - - def process_attributes_for(node, options) - return unless node.attributes - node.attributes.keys.each do |attr_name| - value = node.attributes[attr_name].to_s - - if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value) - node.attributes.delete(attr_name) - else - node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value)) - end - end - end - - def contains_bad_protocols?(attr_name, value) - uri_attributes.include?(attr_name) && - (value =~ /(^[^\/:]*):|(�*58)|(p)|(�*3a)|(%|%)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip)) - end - end -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb b/actionview/lib/action_view/vendor/html-scanner/html/selector.rb deleted file mode 100644 index dfdd724b9b..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb +++ /dev/null @@ -1,830 +0,0 @@ -#-- -# Copyright (c) 2006 Assaf Arkin (http://labnotes.org) -# Under MIT and/or CC By license. -#++ - -module HTML - - # Selects HTML elements using CSS 2 selectors. - # - # The +Selector+ class uses CSS selector expressions to match and select - # HTML elements. - # - # For example: - # selector = HTML::Selector.new "form.login[action=/login]" - # creates a new selector that matches any +form+ element with the class - # +login+ and an attribute +action+ with the value <tt>/login</tt>. - # - # === Matching Elements - # - # Use the #match method to determine if an element matches the selector. - # - # For simple selectors, the method returns an array with that element, - # or +nil+ if the element does not match. For complex selectors (see below) - # the method returns an array with all matched elements, of +nil+ if no - # match found. - # - # For example: - # if selector.match(element) - # puts "Element is a login form" - # end - # - # === Selecting Elements - # - # Use the #select method to select all matching elements starting with - # one element and going through all children in depth-first order. - # - # This method returns an array of all matching elements, an empty array - # if no match is found - # - # For example: - # selector = HTML::Selector.new "input[type=text]" - # matches = selector.select(element) - # matches.each do |match| - # puts "Found text field with name #{match.attributes['name']}" - # end - # - # === Expressions - # - # Selectors can match elements using any of the following criteria: - # * <tt>name</tt> -- Match an element based on its name (tag name). - # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt> - # to match any element. - # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the - # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>. - # * <tt>.class</tt> -- Match an element based on its class name, all - # class names if more than one specified. - # * <tt>[attr]</tt> -- Match an element that has the specified attribute. - # * <tt>[attr=value]</tt> -- Match an element that has the specified - # attribute and value. (More operators are supported see below) - # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class, - # such as <tt>:nth-child</tt> and <tt>:empty</tt>. - # * <tt>:not(expr)</tt> -- Match an element that does not match the - # negation expression. - # - # When using a combination of the above, the element name comes first - # followed by identifier, class names, attributes, pseudo classes and - # negation in any order. Do not separate these parts with spaces! - # Space separation is used for descendant selectors. - # - # For example: - # selector = HTML::Selector.new "form.login[action=/login]" - # The matched element must be of type +form+ and have the class +login+. - # It may have other classes, but the class +login+ is required to match. - # It must also have an attribute called +action+ with the value - # <tt>/login</tt>. - # - # This selector will match the following element: - # <form class="login form" method="post" action="/login"> - # but will not match the element: - # <form method="post" action="/logout"> - # - # === Attribute Values - # - # Several operators are supported for matching attributes: - # * <tt>name</tt> -- The element must have an attribute with that name. - # * <tt>name=value</tt> -- The element must have an attribute with that - # name and value. - # * <tt>name^=value</tt> -- The attribute value must start with the - # specified value. - # * <tt>name$=value</tt> -- The attribute value must end with the - # specified value. - # * <tt>name*=value</tt> -- The attribute value must contain the - # specified value. - # * <tt>name~=word</tt> -- The attribute value must contain the specified - # word (space separated). - # * <tt>name|=word</tt> -- The attribute value must start with specified - # word. - # - # For example, the following two selectors match the same element: - # #my_id - # [id=my_id] - # and so do the following two selectors: - # .my_class - # [class~=my_class] - # - # === Alternatives, siblings, children - # - # Complex selectors use a combination of expressions to match elements: - # * <tt>expr1 expr2</tt> -- Match any element against the second expression - # if it has some parent element that matches the first expression. - # * <tt>expr1 > expr2</tt> -- Match any element against the second expression - # if it is the child of an element that matches the first expression. - # * <tt>expr1 + expr2</tt> -- Match any element against the second expression - # if it immediately follows an element that matches the first expression. - # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression - # that comes after an element that matches the first expression. - # * <tt>expr1, expr2</tt> -- Match any element against the first expression, - # or against the second expression. - # - # Since children and sibling selectors may match more than one element given - # the first element, the #match method may return more than one match. - # - # === Pseudo classes - # - # Pseudo classes were introduced in CSS 3. They are most often used to select - # elements in a given position: - # * <tt>:root</tt> -- Match the element only if it is the root element - # (no parent element). - # * <tt>:empty</tt> -- Match the element only if it has no child elements, - # and no text content. - # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt> - # as its text content (ignoring leading and trailing whitespace). - # * <tt>:only-child</tt> -- Match the element if it is the only child (element) - # of its parent element. - # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element) - # of its parent element and its type. - # * <tt>:first-child</tt> -- Match the element if it is the first child (element) - # of its parent element. - # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element) - # of its parent element of its type. - # * <tt>:last-child</tt> -- Match the element if it is the last child (element) - # of its parent element. - # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element) - # of its parent element of its type. - # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element) - # of its parent element. The value <tt>b</tt> specifies its index, starting with 1. - # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element) - # in each group of <tt>a</tt> child elements of its parent element. - # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element) - # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child - # elements of its parent element. - # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third). - # Same as <tt>:nth-child(2n+1)</tt>. - # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second, - # fourth). Same as <tt>:nth-child(2n+2)</tt>. - # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type. - # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child. - # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and - # only elements of its type. - # * <tt>:not(selector)</tt> -- Match the element only if the element does not - # match the simple selector. - # - # As you can see, <tt>:nth-child</tt> pseudo class and its variant can get quite - # tricky and the CSS specification doesn't do a much better job explaining it. - # But after reading the examples and trying a few combinations, it's easy to - # figure out. - # - # For example: - # table tr:nth-child(odd) - # Selects every second row in the table starting with the first one. - # - # div p:nth-child(4) - # Selects the fourth paragraph in the +div+, but not if the +div+ contains - # other elements, since those are also counted. - # - # div p:nth-of-type(4) - # Selects the fourth paragraph in the +div+, counting only paragraphs, and - # ignoring all other elements. - # - # div p:nth-of-type(-n+4) - # Selects the first four paragraphs, ignoring all others. - # - # And you can always select an element that matches one set of rules but - # not another using <tt>:not</tt>. For example: - # p:not(.post) - # Matches all paragraphs that do not have the class <tt>.post</tt>. - # - # === Substitution Values - # - # You can use substitution with identifiers, class names and element values. - # A substitution takes the form of a question mark (<tt>?</tt>) and uses the - # next value in the argument list following the CSS expression. - # - # The substitution value may be a string or a regular expression. All other - # values are converted to strings. - # - # For example: - # selector = HTML::Selector.new "#?", /^\d+$/ - # matches any element whose identifier consists of one or more digits. - # - # See http://www.w3.org/TR/css3-selectors/ - class Selector - - - # An invalid selector. - class InvalidSelectorError < StandardError #:nodoc: - end - - - class << self - - # :call-seq: - # Selector.for_class(cls) => selector - # - # Creates a new selector for the given class name. - def for_class(cls) - self.new([".?", cls]) - end - - - # :call-seq: - # Selector.for_id(id) => selector - # - # Creates a new selector for the given id. - def for_id(id) - self.new(["#?", id]) - end - - end - - - # :call-seq: - # Selector.new(string, [values ...]) => selector - # - # Creates a new selector from a CSS 2 selector expression. - # - # The first argument is the selector expression. All other arguments - # are used for value substitution. - # - # Throws InvalidSelectorError is the selector expression is invalid. - def initialize(selector, *values) - raise ArgumentError, "CSS expression cannot be empty" if selector.empty? - @source = "" - values = values[0] if values.size == 1 && values[0].is_a?(Array) - - # We need a copy to determine if we failed to parse, and also - # preserve the original pass by-ref statement. - statement = selector.strip.dup - - # Create a simple selector, along with negation. - simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) } - - @alternates = [] - @depends = nil - - # Alternative selector. - if statement.sub!(/^\s*,\s*/, "") - second = Selector.new(statement, values) - @alternates << second - # If there are alternate selectors, we group them in the top selector. - if alternates = second.instance_variable_get(:@alternates) - second.instance_variable_set(:@alternates, []) - @alternates.concat alternates - end - @source << " , " << second.to_s - # Sibling selector: create a dependency into second selector that will - # match element immediately following this one. - elsif statement.sub!(/^\s*\+\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - if element = next_element(element) - second.match(element, first) - end - end - @source << " + " << second.to_s - # Adjacent selector: create a dependency into second selector that will - # match all elements following this one. - elsif statement.sub!(/^\s*~\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - while element = next_element(element) - if subset = second.match(element, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - end - end - matches.empty? ? nil : matches - end - @source << " ~ " << second.to_s - # Child selector: create a dependency into second selector that will - # match a child element of this one. - elsif statement.sub!(/^\s*>\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - element.children.each do |child| - if child.tag? && subset = second.match(child, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - end - end - matches.empty? ? nil : matches - end - @source << " > " << second.to_s - # Descendant selector: create a dependency into second selector that - # will match all descendant elements of this one. Note, - elsif statement =~ /^\s+\S+/ && statement != selector - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - stack = element.children.reverse - while node = stack.pop - next unless node.tag? - if subset = second.match(node, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - elsif children = node.children - stack.concat children.reverse - end - end - matches.empty? ? nil : matches - end - @source << " " << second.to_s - else - # The last selector is where we check that we parsed - # all the parts. - unless statement.empty? || statement.strip.empty? - raise ArgumentError, "Invalid selector: #{statement}" - end - end - end - - - # :call-seq: - # match(element, first?) => array or nil - # - # Matches an element against the selector. - # - # For a simple selector this method returns an array with the - # element if the element matches, nil otherwise. - # - # For a complex selector (sibling and descendant) this method - # returns an array with all matching elements, nil if no match is - # found. - # - # Use +first_only=true+ if you are only interested in the first element. - # - # For example: - # if selector.match(element) - # puts "Element is a login form" - # end - def match(element, first_only = false) - # Match element if no element name or element name same as element name - if matched = (!@tag_name || @tag_name == element.name) - # No match if one of the attribute matches failed - for attr in @attributes - if element.attributes[attr[0]] !~ attr[1] - matched = false - break - end - end - end - - # Pseudo class matches (nth-child, empty, etc). - if matched - for pseudo in @pseudo - unless pseudo.call(element) - matched = false - break - end - end - end - - # Negation. Same rules as above, but we fail if a match is made. - if matched && @negation - for negation in @negation - if negation[:tag_name] == element.name - matched = false - else - for attr in negation[:attributes] - if element.attributes[attr[0]] =~ attr[1] - matched = false - break - end - end - end - if matched - for pseudo in negation[:pseudo] - if pseudo.call(element) - matched = false - break - end - end - end - break unless matched - end - end - - # If element matched but depends on another element (child, - # sibling, etc), apply the dependent matches instead. - if matched && @depends - matches = @depends.call(element, first_only) - else - matches = matched ? [element] : nil - end - - # If this selector is part of the group, try all the alternative - # selectors (unless first_only). - if !first_only || !matches - @alternates.each do |alternate| - break if matches && first_only - if subset = alternate.match(element, first_only) - if matches - matches.concat subset - else - matches = subset - end - end - end - end - - matches - end - - - # :call-seq: - # select(root) => array - # - # Selects and returns an array with all matching elements, beginning - # with one node and traversing through all children depth-first. - # Returns an empty array if no match is found. - # - # The root node may be any element in the document, or the document - # itself. - # - # For example: - # selector = HTML::Selector.new "input[type=text]" - # matches = selector.select(element) - # matches.each do |match| - # puts "Found text field with name #{match.attributes['name']}" - # end - def select(root) - matches = [] - stack = [root] - while node = stack.pop - if node.tag? && subset = match(node, false) - subset.each do |match| - matches << match unless matches.any? { |item| item.equal?(match) } - end - elsif children = node.children - stack.concat children.reverse - end - end - matches - end - - - # Similar to #select but returns the first matching element. Returns +nil+ - # if no element matches the selector. - def select_first(root) - stack = [root] - while node = stack.pop - if node.tag? && subset = match(node, true) - return subset.first if !subset.empty? - elsif children = node.children - stack.concat children.reverse - end - end - nil - end - - - def to_s #:nodoc: - @source - end - - - # Returns the next element after this one. Skips sibling text nodes. - # - # With the +name+ argument, returns the next element with that name, - # skipping other sibling elements. - def next_element(element, name = nil) - if siblings = element.parent.children - found = false - siblings.each do |node| - if node.equal?(element) - found = true - elsif found && node.tag? - return node if (name.nil? || node.name == name) - end - end - end - nil - end - - - protected - - - # Creates a simple selector given the statement and array of - # substitution values. - # - # Returns a hash with the values +tag_name+, +attributes+, - # +pseudo+ (classes) and +negation+. - # - # Called the first time with +can_negate+ true to allow - # negation. Called a second time with false since negation - # cannot be negated. - def simple_selector(statement, values, can_negate = true) - tag_name = nil - attributes = [] - pseudo = [] - negation = [] - - # Element name. (Note that in negation, this can come at - # any order, but for simplicity we allow if only first). - statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match| - match.strip! - tag_name = match.downcase unless match == "*" - @source << match - "" # Remove - end - - # Get identifier, class, attribute name, pseudo or negation. - while true - # Element identifier. - next if statement.sub!(/^#(\?|[\w\-]+)/) do - id = $1 - if id == "?" - id = values.shift - end - @source << "##{id}" - id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp) - attributes << ["id", id] - "" # Remove - end - - # Class name. - next if statement.sub!(/^\.([\w\-]+)/) do - class_name = $1 - @source << ".#{class_name}" - class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp) - attributes << ["class", class_name] - "" # Remove - end - - # Attribute value. - next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do - name, equality, value = $1, $2, $3 - if value == "?" - value = values.shift - else - # Handle single and double quotes. - value.strip! - if (value[0] == ?" || value[0] == ?') && value[0] == value[-1] - value = value[1..-2] - end - end - @source << "[#{name}#{equality}'#{value}']" - attributes << [name.downcase.strip, attribute_match(equality, value)] - "" # Remove - end - - # Root element only. - next if statement.sub!(/^:root/) do - pseudo << lambda do |element| - element.parent.nil? || !element.parent.tag? - end - @source << ":root" - "" # Remove - end - - # Nth-child including last and of-type. - next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match| - reverse = $1 == "last-" - of_type = $2 == "of-type" - @source << ":nth-#{$1}#{$2}(" - case $3 - when "odd" - pseudo << nth_child(2, 1, of_type, reverse) - @source << "odd)" - when "even" - pseudo << nth_child(2, 2, of_type, reverse) - @source << "even)" - when /^(\d+|\?)$/ # b only - b = ($1 == "?" ? values.shift : $1).to_i - pseudo << nth_child(0, b, of_type, reverse) - @source << "#{b})" - when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/ - a = ($1 == "?" ? values.shift : - $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i - b = ($2 == "?" ? values.shift : $2).to_i - pseudo << nth_child(a, b, of_type, reverse) - @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})") - else - raise ArgumentError, "Invalid nth-child #{match}" - end - "" # Remove - end - # First/last child (of type). - next if statement.sub!(/^:(first|last)-(child|of-type)/) do - reverse = $1 == "last" - of_type = $2 == "of-type" - pseudo << nth_child(0, 1, of_type, reverse) - @source << ":#{$1}-#{$2}" - "" # Remove - end - # Only child (of type). - next if statement.sub!(/^:only-(child|of-type)/) do - of_type = $1 == "of-type" - pseudo << only_child(of_type) - @source << ":only-#{$1}" - "" # Remove - end - - # Empty: no child elements or meaningful content (whitespaces - # are ignored). - next if statement.sub!(/^:empty/) do - pseudo << lambda do |element| - empty = true - for child in element.children - if child.tag? || !child.content.strip.empty? - empty = false - break - end - end - empty - end - @source << ":empty" - "" # Remove - end - # Content: match the text content of the element, stripping - # leading and trailing spaces. - next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do - content = $1 - if content == "?" - content = values.shift - elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1] - content = content[1..-2] - end - @source << ":content('#{content}')" - content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp) - pseudo << lambda do |element| - text = "" - for child in element.children - unless child.tag? - text << child.content - end - end - text.strip =~ content - end - "" # Remove - end - - # Negation. Create another simple selector to handle it. - if statement.sub!(/^:not\(\s*/, "") - raise ArgumentError, "Double negatives are not missing feature" unless can_negate - @source << ":not(" - negation << simple_selector(statement, values, false) - raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "") - @source << ")" - next - end - - # No match: moving on. - break - end - - # Return hash. The keys are mapped to instance variables. - {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation} - end - - - # Create a regular expression to match an attribute value based - # on the equality operator (=, ^=, |=, etc). - def attribute_match(equality, value) - regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s) - case equality - when "=" then - # Match the attribute value in full - Regexp.new("^#{regexp}$") - when "~=" then - # Match a space-separated word within the attribute value - Regexp.new("(^|\s)#{regexp}($|\s)") - when "^=" - # Match the beginning of the attribute value - Regexp.new("^#{regexp}") - when "$=" - # Match the end of the attribute value - Regexp.new("#{regexp}$") - when "*=" - # Match substring of the attribute value - regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp) - when "|=" then - # Match the first space-separated item of the attribute value - Regexp.new("^#{regexp}($|\s)") - else - raise InvalidSelectorError, "Invalid operation/value" unless value.empty? - # Match all attributes values (existence check) - // - end - end - - - # Returns a lambda that can match an element against the nth-child - # pseudo class, given the following arguments: - # * +a+ -- Value of a part. - # * +b+ -- Value of b part. - # * +of_type+ -- True to test only elements of this type (of-type). - # * +reverse+ -- True to count in reverse order (last-). - def nth_child(a, b, of_type, reverse) - # a = 0 means select at index b, if b = 0 nothing selected - return lambda { |element| false } if a == 0 && b == 0 - # a < 0 and b < 0 will never match against an index - return lambda { |element| false } if a < 0 && b < 0 - b = a + b + 1 if b < 0 # b < 0 just picks last element from each group - b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based - lambda do |element| - # Element must be inside parent element. - return false unless element.parent && element.parent.tag? - index = 0 - # Get siblings, reverse if counting from last. - siblings = element.parent.children - siblings = siblings.reverse if reverse - # Match element name if of-type, otherwise ignore name. - name = of_type ? element.name : nil - found = false - for child in siblings - # Skip text nodes/comments. - if child.tag? && (name == nil || child.name == name) - if a == 0 - # Shortcut when a == 0 no need to go past count - if index == b - found = child.equal?(element) - break - end - elsif a < 0 - # Only look for first b elements - break if index > b - if child.equal?(element) - found = (index % a) == 0 - break - end - else - # Otherwise, break if child found and count == an+b - if child.equal?(element) - found = (index % a) == b - break - end - end - index += 1 - end - end - found - end - end - - - # Creates a only child lambda. Pass +of-type+ to only look at - # elements of its type. - def only_child(of_type) - lambda do |element| - # Element must be inside parent element. - return false unless element.parent && element.parent.tag? - name = of_type ? element.name : nil - other = false - for child in element.parent.children - # Skip text nodes/comments. - if child.tag? && (name == nil || child.name == name) - unless child.equal?(element) - other = true - break - end - end - end - !other - end - end - - - # Called to create a dependent selector (sibling, descendant, etc). - # Passes the remainder of the statement that will be reduced to zero - # eventually, and array of substitution values. - # - # This method is called from four places, so it helps to put it here - # for reuse. The only logic deals with the need to detect comma - # separators (alternate) and apply them to the selector group of the - # top selector. - def next_selector(statement, values) - second = Selector.new(statement, values) - # If there are alternate selectors, we group them in the top selector. - if alternates = second.instance_variable_get(:@alternates) - second.instance_variable_set(:@alternates, []) - @alternates.concat alternates - end - second - end - - end - - - # See HTML::Selector.new - def self.selector(statement, *values) - Selector.new(statement, *values) - end - - - class Tag - - def select(selector, *values) - selector = HTML::Selector.new(selector, values) - selector.select(self) - end - - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb deleted file mode 100644 index adf4e45930..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb +++ /dev/null @@ -1,107 +0,0 @@ -require 'strscan' - -module HTML #:nodoc: - - # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each - # token is a string. Each string represents either "text", or an HTML element. - # - # This currently assumes valid XHTML, which means no free < or > characters. - # - # Usage: - # - # tokenizer = HTML::Tokenizer.new(text) - # while token = tokenizer.next - # p token - # end - class Tokenizer #:nodoc: - - # The current (byte) position in the text - attr_reader :position - - # The current line number - attr_reader :line - - # Create a new Tokenizer for the given text. - def initialize(text) - text.encode! - @scanner = StringScanner.new(text) - @position = 0 - @line = 0 - @current_line = 1 - end - - # Returns the next token in the sequence, or +nil+ if there are no more tokens in - # the stream. - def next - return nil if @scanner.eos? - @position = @scanner.pos - @line = @current_line - if @scanner.check(/<\S/) - update_current_line(scan_tag) - else - update_current_line(scan_text) - end - end - - private - - # Treat the text at the current position as a tag, and scan it. Supports - # comments, doctype tags, and regular tags, and ignores less-than and - # greater-than characters within quoted strings. - def scan_tag - tag = @scanner.getch - if @scanner.scan(/!--/) # comment - tag << @scanner.matched - tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/)) - elsif @scanner.scan(/!\[CDATA\[/) - tag << @scanner.matched - tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/)) - elsif @scanner.scan(/!/) # doctype - tag << @scanner.matched - tag << consume_quoted_regions - else - tag << consume_quoted_regions - end - tag - end - - # Scan all text up to the next < character and return it. - def scan_text - "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}" - end - - # Counts the number of newlines in the text and updates the current line - # accordingly. - def update_current_line(text) - text.scan(/\r?\n/) { @current_line += 1 } - end - - # Skips over quoted strings, so that less-than and greater-than characters - # within the strings are ignored. - def consume_quoted_regions - text = "" - loop do - match = @scanner.scan_until(/['"<>]/) or break - - delim = @scanner.matched - if delim == "<" - match = match.chop - @scanner.pos -= 1 - end - - text << match - break if delim == "<" || delim == ">" - - # consume the quoted region - while match = @scanner.scan_until(/[\\#{delim}]/) - text << match - break if @scanner.matched == delim - break if @scanner.eos? - text << @scanner.getch # skip the escaped character - end - end - text - end - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/version.rb b/actionview/lib/action_view/vendor/html-scanner/html/version.rb deleted file mode 100644 index 6d645c3e14..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/version.rb +++ /dev/null @@ -1,11 +0,0 @@ -module HTML #:nodoc: - module Version #:nodoc: - - MAJOR = 0 - MINOR = 5 - TINY = 3 - - STRING = [ MAJOR, MINOR, TINY ].join(".") - - end -end diff --git a/actionview/test/template/html-scanner/cdata_node_test.rb b/actionview/test/template/html-scanner/cdata_node_test.rb deleted file mode 100644 index 0bab2bcb33..0000000000 --- a/actionview/test/template/html-scanner/cdata_node_test.rb +++ /dev/null @@ -1,16 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner/html/node' - -class CDATANodeTest < ActiveSupport::TestCase - def setup - @node = HTML::CDATA.new(nil, 0, 0, "<p>howdy</p>") - end - - def test_to_s - assert_equal "<![CDATA[<p>howdy</p>]]>", @node.to_s - end - - def test_content - assert_equal "<p>howdy</p>", @node.content - end -end diff --git a/actionview/test/template/html-scanner/document_test.rb b/actionview/test/template/html-scanner/document_test.rb deleted file mode 100644 index 7b7518e130..0000000000 --- a/actionview/test/template/html-scanner/document_test.rb +++ /dev/null @@ -1,149 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner' - -class DocumentTest < ActiveSupport::TestCase - def test_handle_doctype - doc = nil - assert_nothing_raised do - doc = HTML::Document.new <<-HTML.strip - <!DOCTYPE "blah" "blah" "blah"> - <html> - </html> - HTML - end - assert_equal 3, doc.root.children.length - assert_equal %{<!DOCTYPE "blah" "blah" "blah">}, doc.root.children[0].content - assert_match %r{\s+}m, doc.root.children[1].content - assert_equal "html", doc.root.children[2].name - end - - def test_find_img - doc = HTML::Document.new <<-HTML.strip - <html> - <body> - <p><img src="hello.gif"></p> - </body> - </html> - HTML - assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"}) - end - - def test_find_all - doc = HTML::Document.new <<-HTML.strip - <html> - <body> - <p class="test"><img src="hello.gif"></p> - <div class="foo"> - <p class="test">something</p> - <p>here is <em class="test">more</em></p> - </div> - </body> - </html> - HTML - all = doc.find_all :attributes => { :class => "test" } - assert_equal 3, all.length - assert_equal [ "p", "p", "em" ], all.map { |n| n.name } - end - - def test_find_with_text - doc = HTML::Document.new <<-HTML.strip - <html> - <body> - <p>Some text</p> - </body> - </html> - HTML - assert doc.find(:content => "Some text") - assert doc.find(:tag => "p", :child => { :content => "Some text" }) - assert doc.find(:tag => "p", :child => "Some text") - assert doc.find(:tag => "p", :content => "Some text") - end - - def test_parse_xml - assert_nothing_raised { HTML::Document.new("<tags><tag/></tags>", true, true) } - assert_nothing_raised { HTML::Document.new("<outer><link>something</link></outer>", true, true) } - end - - def test_parse_document - doc = HTML::Document.new(<<-HTML) - <div> - <h2>blah</h2> - <table> - </table> - </div> - HTML - assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } }) - end - - def test_tag_nesting_nothing_to_s - doc = HTML::Document.new("<tag></tag>") - assert_equal "<tag></tag>", doc.root.to_s - end - - def test_tag_nesting_space_to_s - doc = HTML::Document.new("<tag> </tag>") - assert_equal "<tag> </tag>", doc.root.to_s - end - - def test_tag_nesting_text_to_s - doc = HTML::Document.new("<tag>text</tag>") - assert_equal "<tag>text</tag>", doc.root.to_s - end - - def test_tag_nesting_tag_to_s - doc = HTML::Document.new("<tag><nested /></tag>") - assert_equal "<tag><nested /></tag>", doc.root.to_s - end - - def test_parse_cdata - doc = HTML::Document.new(<<-HTML) -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> - <head> - <title><![CDATA[<br>]]></title> - </head> - <body> - <p>this document has <br> for a title</p> - </body> -</html> -HTML - - assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" }) - assert doc.find(:tag => "title", :child => "<br>") - end - - def test_find_empty_tag - doc = HTML::Document.new("<div id='map'></div>") - assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "") - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil) - end - - def test_parse_invalid_document - assert_nothing_raised do - HTML::Document.new("<html> - <table> - <tr> - <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td> - </tr> - </table> - </html>") - end - end - - def test_invalid_document_raises_exception_when_strict - assert_raise RuntimeError do - HTML::Document.new("<html> - <table> - <tr> - <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td> - </tr> - </table> - </html>", true) - end - end - -end diff --git a/actionview/test/template/html-scanner/node_test.rb b/actionview/test/template/html-scanner/node_test.rb deleted file mode 100644 index a2734dfcfe..0000000000 --- a/actionview/test/template/html-scanner/node_test.rb +++ /dev/null @@ -1,90 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner/html/node' - -class NodeTest < ActiveSupport::TestCase - - class MockNode - def initialize(matched, value) - @matched = matched - @value = value - end - - def find(conditions) - @matched && self - end - - def to_s - @value.to_s - end - end - - def setup - @node = HTML::Node.new("parent") - @node.children.concat [MockNode.new(false,1), MockNode.new(true,"two"), MockNode.new(false,:three)] - end - - def test_match - assert !@node.match("foo") - end - - def test_tag - assert !@node.tag? - end - - def test_to_s - assert_equal "1twothree", @node.to_s - end - - def test_find - assert_equal "two", @node.find('blah').to_s - end - - def test_parse_strict - s = "<b foo='hello'' bar='baz'>" - assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) } - end - - def test_parse_relaxed - s = "<b foo='hello'' bar='baz'>" - node = nil - assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } - assert node.attributes.has_key?("foo") - assert !node.attributes.has_key?("bar") - end - - def test_to_s_with_boolean_attrs - s = "<b foo bar>" - node = HTML::Node.parse(nil,0,0,s) - assert node.attributes.has_key?("foo") - assert node.attributes.has_key?("bar") - assert "<b foo bar>", node.to_s - end - - def test_parse_with_unclosed_tag - s = "<span onmouseover='bang'" - node = nil - assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } - assert node.attributes.has_key?("onmouseover") - end - - def test_parse_with_valid_cdata_section - s = "<![CDATA[<span>contents</span>]]>" - node = nil - assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } - assert_kind_of HTML::CDATA, node - assert_equal '<span>contents</span>', node.content - end - - def test_parse_strict_with_unterminated_cdata_section - s = "<![CDATA[neverending..." - assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) } - end - - def test_parse_relaxed_with_unterminated_cdata_section - s = "<![CDATA[neverending..." - node = nil - assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } - assert_kind_of HTML::CDATA, node - assert_equal 'neverending...', node.content - end -end diff --git a/actionview/test/template/html-scanner/tag_node_test.rb b/actionview/test/template/html-scanner/tag_node_test.rb deleted file mode 100644 index 633d15ad2f..0000000000 --- a/actionview/test/template/html-scanner/tag_node_test.rb +++ /dev/null @@ -1,244 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner/html/node' - -class TagNodeTest < ActiveSupport::TestCase - def test_open_without_attributes - node = tag("<tag>") - assert_equal "tag", node.name - assert_equal Hash.new, node.attributes - assert_nil node.closing - end - - def test_open_with_attributes - node = tag("<TAG1 foo=hey_ho x:bar=\"blah blah\" BAZ='blah blah blah' >") - assert_equal "tag1", node.name - assert_equal "hey_ho", node["foo"] - assert_equal "blah blah", node["x:bar"] - assert_equal "blah blah blah", node["baz"] - end - - def test_self_closing_without_attributes - node = tag("<tag/>") - assert_equal "tag", node.name - assert_equal Hash.new, node.attributes - assert_equal :self, node.closing - end - - def test_self_closing_with_attributes - node = tag("<tag a=b/>") - assert_equal "tag", node.name - assert_equal( { "a" => "b" }, node.attributes ) - assert_equal :self, node.closing - end - - def test_closing_without_attributes - node = tag("</tag>") - assert_equal "tag", node.name - assert_nil node.attributes - assert_equal :close, node.closing - end - - def test_bracket_op_when_no_attributes - node = tag("</tag>") - assert_nil node["foo"] - end - - def test_bracket_op_when_attributes - node = tag("<tag a=b/>") - assert_equal "b", node["a"] - end - - def test_attributes_with_escaped_quotes - node = tag("<tag a='b\\'c' b=\"bob \\\"float\\\"\">") - assert_equal "b\\'c", node["a"] - assert_equal "bob \\\"float\\\"", node["b"] - end - - def test_to_s - node = tag("<a b=c d='f' g=\"h 'i'\" />") - node = node.to_s - assert node.include?('a') - assert node.include?('b="c"') - assert node.include?('d="f"') - assert node.include?('g="h') - assert node.include?('i') - end - - def test_tag - assert tag("<tag>").tag? - end - - def test_match_tag_as_string - assert tag("<tag>").match(:tag => "tag") - assert !tag("<tag>").match(:tag => "b") - end - - def test_match_tag_as_regexp - assert tag("<tag>").match(:tag => /t.g/) - assert !tag("<tag>").match(:tag => /t[bqs]g/) - end - - def test_match_attributes_as_string - t = tag("<tag a=something b=else />") - assert t.match(:attributes => {"a" => "something"}) - assert t.match(:attributes => {"b" => "else"}) - end - - def test_match_attributes_as_regexp - t = tag("<tag a=something b=else />") - assert t.match(:attributes => {"a" => /^something$/}) - assert t.match(:attributes => {"b" => /e.*e/}) - assert t.match(:attributes => {"a" => /me..i/, "b" => /.ls.$/}) - end - - def test_match_attributes_as_number - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"a" => 15}) - assert t.match(:attributes => {"b" => 3.1415}) - assert t.match(:attributes => {"a" => 15, "b" => 3.1415}) - end - - def test_match_attributes_exist - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"a" => true}) - assert t.match(:attributes => {"b" => true}) - assert t.match(:attributes => {"a" => true, "b" => true}) - end - - def test_match_attributes_not_exist - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"c" => false}) - assert t.match(:attributes => {"c" => nil}) - assert t.match(:attributes => {"a" => true, "c" => false}) - end - - def test_match_parent_success - t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) - assert t.match(:parent => {:tag => "foo", :attributes => {"k" => /v.l/, "j" => false}}) - end - - def test_match_parent_fail - t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) - assert !t.match(:parent => {:tag => /kafka/}) - end - - def test_match_child_success - t = tag("<tag x:k='something'>") - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert t.match(:child => { :tag => "sib", :attributes => {"v" => /j/}}) - assert t.match(:child => { :attributes => {"a" => "kelly"}}) - end - - def test_match_child_fail - t = tag("<tag x:k='something'>") - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert !t.match(:child => { :tag => "sib", :attributes => {"v" => /r/}}) - assert !t.match(:child => { :attributes => {"v" => false}}) - end - - def test_match_ancestor_success - t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) - assert t.match(:ancestor => {:tag => "parent", :attributes => {"a" => /ll/}}) - assert t.match(:ancestor => {:attributes => {"m" => "vaughn"}}) - end - - def test_match_ancestor_fail - t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) - assert !t.match(:ancestor => {:tag => /^parent/, :attributes => {"v" => /m/}}) - assert !t.match(:ancestor => {:attributes => {"v" => false}}) - end - - def test_match_descendant_success - tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) - assert t.match(:descendant => {:tag => "child", :attributes => {"a" => /ll/}}) - assert t.match(:descendant => {:attributes => {"m" => "vaughn"}}) - end - - def test_match_descendant_fail - tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) - assert !t.match(:descendant => {:tag => /^child/, :attributes => {"v" => /m/}}) - assert !t.match(:descendant => {:attributes => {"v" => false}}) - end - - def test_match_child_count - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert t.match(:children => { :count => 2 }) - assert t.match(:children => { :count => 2..4 }) - assert t.match(:children => { :less_than => 4 }) - assert t.match(:children => { :greater_than => 1 }) - assert !t.match(:children => { :count => 3 }) - end - - def test_conditions_as_strings - t = tag("<tag x:k='something'>") - assert t.match("tag" => "tag") - assert t.match("attributes" => { "x:k" => "something" }) - assert !t.match("tag" => "gat") - assert !t.match("attributes" => { "x:j" => "something" }) - end - - def test_attributes_as_symbols - t = tag("<child v=john a=kelly>") - assert t.match(:attributes => { :v => /oh/ }) - assert t.match(:attributes => { :a => /ll/ }) - end - - def test_match_sibling - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:sibling => {:tag => "span", :attributes => {:a => true}}) - assert m.match(:sibling => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:sibling => {:tag => "span", :attributes => {:k => true}}) - end - - def test_match_sibling_before - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:before => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:before => {:tag => "span", :attributes => {:a => true}}) - assert !m.match(:before => {:tag => "span", :attributes => {:k => true}}) - end - - def test_match_sibling_after - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:after => {:tag => "span", :attributes => {:a => true}}) - assert !m.match(:after => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:after => {:tag => "span", :attributes => {:k => true}}) - end - - def test_tag_to_s - t = tag("<b x='foo'>") - tag("hello", t) - tag("<hr />", t) - assert_equal %(<b x="foo">hello<hr /></b>), t.to_s - end - - private - - def tag(content, parent=nil) - node = HTML::Node.parse(parent,0,0,content) - parent.children << node if parent - node - end -end diff --git a/actionview/test/template/html-scanner/text_node_test.rb b/actionview/test/template/html-scanner/text_node_test.rb deleted file mode 100644 index d8ab667adf..0000000000 --- a/actionview/test/template/html-scanner/text_node_test.rb +++ /dev/null @@ -1,51 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner/html/node' - -class TextNodeTest < ActiveSupport::TestCase - def setup - @node = HTML::Text.new(nil, 0, 0, "hello, howdy, aloha, annyeong") - end - - def test_to_s - assert_equal "hello, howdy, aloha, annyeong", @node.to_s - end - - def test_find_string - assert_equal @node, @node.find("hello, howdy, aloha, annyeong") - assert_equal false, @node.find("bogus") - end - - def test_find_regexp - assert_equal @node, @node.find(/an+y/) - assert_nil @node.find(/b/) - end - - def test_find_hash - assert_equal @node, @node.find(:content => /howdy/) - assert_nil @node.find(:content => /^howdy$/) - assert_equal false, @node.find(:content => "howdy") - end - - def test_find_other - assert_nil @node.find(:hello) - end - - def test_match_string - assert @node.match("hello, howdy, aloha, annyeong") - assert_equal false, @node.match("bogus") - end - - def test_match_regexp - assert_not_nil @node, @node.match(/an+y/) - assert_nil @node.match(/b/) - end - - def test_match_hash - assert_not_nil @node, @node.match(:content => "howdy") - assert_nil @node.match(:content => /^howdy$/) - end - - def test_match_other - assert_nil @node.match(:hello) - end -end diff --git a/actionview/test/template/html-scanner/tokenizer_test.rb b/actionview/test/template/html-scanner/tokenizer_test.rb deleted file mode 100644 index d1cdd53211..0000000000 --- a/actionview/test/template/html-scanner/tokenizer_test.rb +++ /dev/null @@ -1,132 +0,0 @@ -require 'abstract_unit' -require 'action_view/vendor/html-scanner/html/tokenizer' - -class TokenizerTest < ActiveSupport::TestCase - - def test_blank - tokenize "" - assert_end - end - - def test_space - tokenize " " - assert_next " " - assert_end - end - - def test_tag_simple_open - tokenize "<tag>" - assert_next "<tag>" - assert_end - end - - def test_tag_simple_self_closing - tokenize "<tag />" - assert_next "<tag />" - assert_end - end - - def test_tag_simple_closing - tokenize "</tag>" - assert_next "</tag>" - end - - def test_tag_with_single_quoted_attribute - tokenize %{<tag a='hello'>x} - assert_next %{<tag a='hello'>} - end - - def test_tag_with_single_quoted_attribute_with_escape - tokenize %{<tag a='hello\\''>x} - assert_next %{<tag a='hello\\''>} - end - - def test_tag_with_double_quoted_attribute - tokenize %{<tag a="hello">x} - assert_next %{<tag a="hello">} - end - - def test_tag_with_double_quoted_attribute_with_escape - tokenize %{<tag a="hello\\"">x} - assert_next %{<tag a="hello\\"">} - end - - def test_tag_with_unquoted_attribute - tokenize %{<tag a=hello>x} - assert_next %{<tag a=hello>} - end - - def test_tag_with_lt_char_in_attribute - tokenize %{<tag a="x < y">x} - assert_next %{<tag a="x < y">} - end - - def test_tag_with_gt_char_in_attribute - tokenize %{<tag a="x > y">x} - assert_next %{<tag a="x > y">} - end - - def test_doctype_tag - tokenize %{<!DOCTYPE "blah" "blah" "blah">\n <html>} - assert_next %{<!DOCTYPE "blah" "blah" "blah">} - assert_next %{\n } - assert_next %{<html>} - end - - def test_cdata_tag - tokenize %{<![CDATA[<br>]]>} - assert_next %{<![CDATA[<br>]]>} - assert_end - end - - def test_unterminated_cdata_tag - tokenize %{<content:encoded><![CDATA[ neverending...} - assert_next %{<content:encoded>} - assert_next %{<![CDATA[ neverending...} - assert_end - end - - def test_less_than_with_space - tokenize %{original < hello > world} - assert_next %{original } - assert_next %{< hello > world} - end - - def test_less_than_without_matching_greater_than - tokenize %{hello <span onmouseover="gotcha"\n<b>foo</b>\nbar</span>} - assert_next %{hello } - assert_next %{<span onmouseover="gotcha"\n} - assert_next %{<b>} - assert_next %{foo} - assert_next %{</b>} - assert_next %{\nbar} - assert_next %{</span>} - assert_end - end - - def test_unterminated_comment - tokenize %{hello <!-- neverending...} - assert_next %{hello } - assert_next %{<!-- neverending...} - assert_end - end - - private - - def tokenize(text) - @tokenizer = HTML::Tokenizer.new(text) - end - - def assert_next(expected, message=nil) - token = @tokenizer.next - assert_equal expected, token, message - end - - def assert_sequence(*expected) - assert_next expected.shift until expected.empty? - end - - def assert_end(message=nil) - assert_nil @tokenizer.next, message - end -end |