diff options
author | Rafael Mendonça França <rafaelmfranca@gmail.com> | 2014-07-10 16:52:00 -0300 |
---|---|---|
committer | Rafael Mendonça França <rafaelmfranca@gmail.com> | 2014-07-10 16:52:00 -0300 |
commit | 3229eda00c2a38526787fddb6cc307cfee5c5ac6 (patch) | |
tree | 9b45bf0c52e5b20ab403a5ac0025b9b1b18a5ce3 /actionview/lib/action_view/vendor/html-scanner | |
parent | 6e76031e8f1f815b390f966cb21e25c66e5ded50 (diff) | |
parent | ff1b7e75357eb7797f25aeab33994765130db67f (diff) | |
download | rails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.tar.gz rails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.tar.bz2 rails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.zip |
Merge pull request #11218 from kaspth/loofah-integration
Loofah-integration
Conflicts:
actionpack/CHANGELOG.md
actionview/CHANGELOG.md
Diffstat (limited to 'actionview/lib/action_view/vendor/html-scanner')
6 files changed, 0 insertions, 1736 deletions
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/document.rb b/actionview/lib/action_view/vendor/html-scanner/html/document.rb deleted file mode 100644 index 386820300a..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/document.rb +++ /dev/null @@ -1,68 +0,0 @@ -require 'html/tokenizer' -require 'html/node' -require 'html/selector' -require 'html/sanitizer' - -module HTML #:nodoc: - # A top-level HTML document. You give it a body of text, and it will parse that - # text into a tree of nodes. - class Document #:nodoc: - - # The root of the parsed document. - attr_reader :root - - # Create a new Document from the given text. - def initialize(text, strict=false, xml=false) - tokenizer = Tokenizer.new(text) - @root = Node.new(nil) - node_stack = [ @root ] - while token = tokenizer.next - node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict) - - node_stack.last.children << node unless node.tag? && node.closing == :close - if node.tag? - if node_stack.length > 1 && node.closing == :close - if node_stack.last.name == node.name - if node_stack.last.children.empty? - node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "") - end - node_stack.pop - else - open_start = node_stack.last.position - 20 - open_start = 0 if open_start < 0 - close_start = node.position - 20 - close_start = 0 if close_start < 0 - msg = <<EOF.strip -ignoring attempt to close #{node_stack.last.name} with #{node.name} - opened at byte #{node_stack.last.position}, line #{node_stack.last.line} - closed at byte #{node.position}, line #{node.line} - attributes at open: #{node_stack.last.attributes.inspect} - text around open: #{text[open_start,40].inspect} - text around close: #{text[close_start,40].inspect} -EOF - strict ? raise(msg) : warn(msg) - end - elsif !node.childless?(xml) && node.closing != :close - node_stack.push node - end - end - end - end - - # Search the tree for (and return) the first node that matches the given - # conditions. The conditions are interpreted differently for different node - # types, see HTML::Text#find and HTML::Tag#find. - def find(conditions) - @root.find(conditions) - end - - # Search the tree for (and return) all nodes that match the given - # conditions. The conditions are interpreted differently for different node - # types, see HTML::Text#find and HTML::Tag#find. - def find_all(conditions) - @root.find_all(conditions) - end - - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/node.rb b/actionview/lib/action_view/vendor/html-scanner/html/node.rb deleted file mode 100644 index 27f0f2f6f8..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/node.rb +++ /dev/null @@ -1,532 +0,0 @@ -require 'strscan' - -module HTML #:nodoc: - - class Conditions < Hash #:nodoc: - def initialize(hash) - super() - hash = { :content => hash } unless Hash === hash - hash = keys_to_symbols(hash) - hash.each do |k,v| - case k - when :tag, :content then - # keys are valid, and require no further processing - when :attributes then - hash[k] = keys_to_strings(v) - when :parent, :child, :ancestor, :descendant, :sibling, :before, - :after - hash[k] = Conditions.new(v) - when :children - hash[k] = v = keys_to_symbols(v) - v.each do |key,value| - case key - when :count, :greater_than, :less_than - # keys are valid, and require no further processing - when :only - v[key] = Conditions.new(value) - else - raise "illegal key #{key.inspect} => #{value.inspect}" - end - end - else - raise "illegal key #{k.inspect} => #{v.inspect}" - end - end - update hash - end - - private - - def keys_to_strings(hash) - Hash[hash.keys.map {|k| [k.to_s, hash[k]]}] - end - - def keys_to_symbols(hash) - Hash[hash.keys.map do |k| - raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym) - [k.to_sym, hash[k]] - end] - end - end - - # The base class of all nodes, textual and otherwise, in an HTML document. - class Node #:nodoc: - # The array of children of this node. Not all nodes have children. - attr_reader :children - - # The parent node of this node. All nodes have a parent, except for the - # root node. - attr_reader :parent - - # The line number of the input where this node was begun - attr_reader :line - - # The byte position in the input where this node was begun - attr_reader :position - - # Create a new node as a child of the given parent. - def initialize(parent, line=0, pos=0) - @parent = parent - @children = [] - @line, @position = line, pos - end - - # Returns a textual representation of the node. - def to_s - @children.join() - end - - # Returns false (subclasses must override this to provide specific matching - # behavior.) +conditions+ may be of any type. - def match(conditions) - false - end - - # Search the children of this node for the first node for which #find - # returns non +nil+. Returns the result of the #find call that succeeded. - def find(conditions) - conditions = validate_conditions(conditions) - @children.each do |child| - node = child.find(conditions) - return node if node - end - nil - end - - # Search for all nodes that match the given conditions, and return them - # as an array. - def find_all(conditions) - conditions = validate_conditions(conditions) - - matches = [] - matches << self if match(conditions) - @children.each do |child| - matches.concat child.find_all(conditions) - end - matches - end - - # Returns +false+. Subclasses may override this if they define a kind of - # tag. - def tag? - false - end - - def validate_conditions(conditions) - Conditions === conditions ? conditions : Conditions.new(conditions) - end - - def ==(node) - return false unless self.class == node.class && children.size == node.children.size - - equivalent = true - - children.size.times do |i| - equivalent &&= children[i] == node.children[i] - end - - equivalent - end - - class <<self - def parse(parent, line, pos, content, strict=true) - if content !~ /^<\S/ - Text.new(parent, line, pos, content) - else - scanner = StringScanner.new(content) - - unless scanner.skip(/</) - if strict - raise "expected <" - else - return Text.new(parent, line, pos, content) - end - end - - if scanner.skip(/!\[CDATA\[/) - unless scanner.skip_until(/\]\]>/) - if strict - raise "expected ]]> (got #{scanner.rest.inspect} for #{content})" - else - scanner.skip_until(/\Z/) - end - end - - return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, '')) - end - - closing = ( scanner.scan(/\//) ? :close : nil ) - return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/) - name.downcase! - - unless closing - scanner.skip(/\s*/) - attributes = {} - while attr = scanner.scan(/[-\w:]+/) - value = true - if scanner.scan(/\s*=\s*/) - if delim = scanner.scan(/['"]/) - value = "" - while text = scanner.scan(/[^#{delim}\\]+|./) - case text - when "\\" then - value << text - break if scanner.eos? - value << scanner.getch - when delim - break - else value << text - end - end - else - value = scanner.scan(/[^\s>\/]+/) - end - end - attributes[attr.downcase] = value - scanner.skip(/\s*/) - end - - closing = ( scanner.scan(/\//) ? :self : nil ) - end - - unless scanner.scan(/\s*>/) - if strict - raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" - else - # throw away all text until we find what we're looking for - scanner.skip_until(/>/) or scanner.terminate - end - end - - Tag.new(parent, line, pos, name, attributes, closing) - end - end - end - end - - # A node that represents text, rather than markup. - class Text < Node #:nodoc: - - attr_reader :content - - # Creates a new text node as a child of the given parent, with the given - # content. - def initialize(parent, line, pos, content) - super(parent, line, pos) - @content = content - end - - # Returns the content of this node. - def to_s - @content - end - - # Returns +self+ if this node meets the given conditions. Text nodes support - # conditions of the following kinds: - # - # * if +conditions+ is a string, it must be a substring of the node's - # content - # * if +conditions+ is a regular expression, it must match the node's - # content - # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that - # is either a string or a regexp, and which is interpreted as described - # above. - def find(conditions) - match(conditions) && self - end - - # Returns non-+nil+ if this node meets the given conditions, or +nil+ - # otherwise. See the discussion of #find for the valid conditions. - def match(conditions) - case conditions - when String - @content == conditions - when Regexp - @content =~ conditions - when Hash - conditions = validate_conditions(conditions) - - # Text nodes only have :content, :parent, :ancestor - unless (conditions.keys - [:content, :parent, :ancestor]).empty? - return false - end - - match(conditions[:content]) - else - nil - end - end - - def ==(node) - return false unless super - content == node.content - end - end - - # A CDATA node is simply a text node with a specialized way of displaying - # itself. - class CDATA < Text #:nodoc: - def to_s - "<![CDATA[#{super}]]>" - end - end - - # A Tag is any node that represents markup. It may be an opening tag, a - # closing tag, or a self-closing tag. It has a name, and may have a hash of - # attributes. - class Tag < Node #:nodoc: - - # Either +nil+, <tt>:close</tt>, or <tt>:self</tt> - attr_reader :closing - - # Either +nil+, or a hash of attributes for this node. - attr_reader :attributes - - # The name of this tag. - attr_reader :name - - # Create a new node as a child of the given parent, using the given content - # to describe the node. It will be parsed and the node name, attributes and - # closing status extracted. - def initialize(parent, line, pos, name, attributes, closing) - super(parent, line, pos) - @name = name - @attributes = attributes - @closing = closing - end - - # A convenience for obtaining an attribute of the node. Returns +nil+ if - # the node has no attributes. - def [](attr) - @attributes ? @attributes[attr] : nil - end - - # Returns non-+nil+ if this tag can contain child nodes. - def childless?(xml = false) - return false if xml && @closing.nil? - !@closing.nil? || - @name =~ /^(img|br|hr|link|meta|area|base|basefont| - col|frame|input|isindex|param)$/ox - end - - # Returns a textual representation of the node - def to_s - if @closing == :close - "</#{@name}>" - else - s = "<#{@name}" - @attributes.each do |k,v| - s << " #{k}" - s << "=\"#{v}\"" if String === v - end - s << " /" if @closing == :self - s << ">" - @children.each { |child| s << child.to_s } - s << "</#{@name}>" if @closing != :self && !@children.empty? - s - end - end - - # If either the node or any of its children meet the given conditions, the - # matching node is returned. Otherwise, +nil+ is returned. (See the - # description of the valid conditions in the +match+ method.) - def find(conditions) - match(conditions) && self || super - end - - # Returns +true+, indicating that this node represents an HTML tag. - def tag? - true - end - - # Returns +true+ if the node meets any of the given conditions. The - # +conditions+ parameter must be a hash of any of the following keys - # (all are optional): - # - # * <tt>:tag</tt>: the node name must match the corresponding value - # * <tt>:attributes</tt>: a hash. The node's values must match the - # corresponding values in the hash. - # * <tt>:parent</tt>: a hash. The node's parent must match the - # corresponding hash. - # * <tt>:child</tt>: a hash. At least one of the node's immediate children - # must meet the criteria described by the hash. - # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must - # meet the criteria described by the hash. - # * <tt>:descendant</tt>: a hash. At least one of the node's descendants - # must meet the criteria described by the hash. - # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must - # meet the criteria described by the hash. - # * <tt>:after</tt>: a hash. The node must be after any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:before</tt>: a hash. The node must be before any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the - # keys: - # ** <tt>:count</tt>: either a number or a range which must equal (or - # include) the number of children that match. - # ** <tt>:less_than</tt>: the number of matching children must be less than - # this number. - # ** <tt>:greater_than</tt>: the number of matching children must be - # greater than this number. - # ** <tt>:only</tt>: another hash consisting of the keys to use - # to match on the children, and only matching children will be - # counted. - # - # Conditions are matched using the following algorithm: - # - # * if the condition is a string, it must be a substring of the value. - # * if the condition is a regexp, it must match the value. - # * if the condition is a number, the value must match number.to_s. - # * if the condition is +true+, the value must not be +nil+. - # * if the condition is +false+ or +nil+, the value must be +nil+. - # - # Usage: - # - # # test if the node is a "span" tag - # node.match tag: "span" - # - # # test if the node's parent is a "div" - # node.match parent: { tag: "div" } - # - # # test if any of the node's ancestors are "table" tags - # node.match ancestor: { tag: "table" } - # - # # test if any of the node's immediate children are "em" tags - # node.match child: { tag: "em" } - # - # # test if any of the node's descendants are "strong" tags - # node.match descendant: { tag: "strong" } - # - # # test if the node has between 2 and 4 span tags as immediate children - # node.match children: { count: 2..4, only: { tag: "span" } } - # - # # get funky: test to see if the node is a "div", has a "ul" ancestor - # # and an "li" parent (with "class" = "enum"), and whether or not it has - # # a "span" descendant that contains # text matching /hello world/: - # node.match tag: "div", - # ancestor: { tag: "ul" }, - # parent: { tag: "li", - # attributes: { class: "enum" } }, - # descendant: { tag: "span", - # child: /hello world/ } - def match(conditions) - conditions = validate_conditions(conditions) - # check content of child nodes - if conditions[:content] - if children.empty? - return false unless match_condition("", conditions[:content]) - else - return false unless children.find { |child| child.match(conditions[:content]) } - end - end - - # test the name - return false unless match_condition(@name, conditions[:tag]) if conditions[:tag] - - # test attributes - (conditions[:attributes] || {}).each do |key, value| - return false unless match_condition(self[key], value) - end - - # test parent - return false unless parent.match(conditions[:parent]) if conditions[:parent] - - # test children - return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] - - # test ancestors - if conditions[:ancestor] - return false unless catch :found do - p = self - throw :found, true if p.match(conditions[:ancestor]) while p = p.parent - end - end - - # test descendants - if conditions[:descendant] - return false unless children.find do |child| - # test the child - child.match(conditions[:descendant]) || - # test the child's descendants - child.match(:descendant => conditions[:descendant]) - end - end - - # count children - if opts = conditions[:children] - matches = children.select do |c| - (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) - end - - matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] - opts.each do |key, value| - next if key == :only - case key - when :count - if Integer === value - return false if matches.length != value - else - return false unless value.include?(matches.length) - end - when :less_than - return false unless matches.length < value - when :greater_than - return false unless matches.length > value - else raise "unknown count condition #{key}" - end - end - end - - # test siblings - if conditions[:sibling] || conditions[:before] || conditions[:after] - siblings = parent ? parent.children : [] - self_index = siblings.index(self) - - if conditions[:sibling] - return false unless siblings.detect do |s| - s != self && s.match(conditions[:sibling]) - end - end - - if conditions[:before] - return false unless siblings[self_index+1..-1].detect do |s| - s != self && s.match(conditions[:before]) - end - end - - if conditions[:after] - return false unless siblings[0,self_index].detect do |s| - s != self && s.match(conditions[:after]) - end - end - end - - true - end - - def ==(node) - return false unless super - return false unless closing == node.closing && self.name == node.name - attributes == node.attributes - end - - private - # Match the given value to the given condition. - def match_condition(value, condition) - case condition - when String - value && value == condition - when Regexp - value && value.match(condition) - when Numeric - value == condition.to_s - when true - !value.nil? - when false, nil - value.nil? - else - false - end - end - end -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb deleted file mode 100644 index ed34eecf55..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb +++ /dev/null @@ -1,188 +0,0 @@ -require 'set' -require 'cgi' -require 'active_support/core_ext/module/attribute_accessors' - -module HTML - class Sanitizer - def sanitize(text, options = {}) - validate_options(options) - return text unless sanitizeable?(text) - tokenize(text, options).join - end - - def sanitizeable?(text) - !(text.nil? || text.empty? || !text.index("<")) - end - - protected - def tokenize(text, options) - tokenizer = HTML::Tokenizer.new(text) - result = [] - while token = tokenizer.next - node = Node.parse(nil, 0, 0, token, false) - process_node node, result, options - end - result - end - - def process_node(node, result, options) - result << node.to_s - end - - def validate_options(options) - if options[:tags] && !options[:tags].is_a?(Enumerable) - raise ArgumentError, "You should pass :tags as an Enumerable" - end - - if options[:attributes] && !options[:attributes].is_a?(Enumerable) - raise ArgumentError, "You should pass :attributes as an Enumerable" - end - end - end - - class FullSanitizer < Sanitizer - def sanitize(text, options = {}) - result = super - # strip any comments, and if they have a newline at the end (ie. line with - # only a comment) strip that too - result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m) - # Recurse - handle all dirty nested tags - result == text ? result : sanitize(result, options) - end - - def process_node(node, result, options) - result << node.to_s if node.class == HTML::Text - end - end - - class LinkSanitizer < FullSanitizer - cattr_accessor :included_tags, :instance_writer => false - self.included_tags = Set.new(%w(a href)) - - def sanitizeable?(text) - !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">"))) - end - - protected - def process_node(node, result, options) - result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) - end - end - - class WhiteListSanitizer < Sanitizer - [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, - :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| - class_attribute attr, :instance_writer => false - end - - # A regular expression of the valid characters used to separate protocols like - # the ':' in 'http://foo.com' - self.protocol_separator = /:|(�*58)|(p)|(�*3a)|(%|%)3A/i - - # Specifies a Set of HTML attributes that can have URIs. - self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) - - # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed - # to just escaping harmless tags like <font> - self.bad_tags = Set.new(%w(script)) - - # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr - acronym a img blockquote del ins)) - - # Specifies the default Set of html attributes that the #sanitize helper will leave - # in the allowed tag. - self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto - feed svn urn aim rsync tag ssh sftp rtsp afs)) - - # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display - elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height - overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation - speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space - width)) - - # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center - collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal - nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) - - # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. - self.shorthand_css_properties = Set.new(%w(background border margin padding)) - - # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute - def sanitize_css(style) - # disallow urls - style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') - - # gauntlet - if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ || - style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/ - return '' - end - - clean = [] - style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| - if allowed_css_properties.include?(prop.downcase) - clean << prop + ': ' + val + ';' - elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) - unless val.split().any? do |keyword| - !allowed_css_keywords.include?(keyword) && - keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/ - end - clean << prop + ': ' + val + ';' - end - end - end - clean.join(' ') - end - - protected - def tokenize(text, options) - options[:parent] = [] - options[:attributes] ||= allowed_attributes - options[:tags] ||= allowed_tags - super - end - - def process_node(node, result, options) - result << case node - when HTML::Tag - if node.closing == :close - options[:parent].shift - else - options[:parent].unshift node.name - end - - process_attributes_for node, options - - options[:tags].include?(node.name) ? node : nil - else - bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "<") - end - end - - def process_attributes_for(node, options) - return unless node.attributes - node.attributes.keys.each do |attr_name| - value = node.attributes[attr_name].to_s - - if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value) - node.attributes.delete(attr_name) - else - node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value)) - end - end - end - - def contains_bad_protocols?(attr_name, value) - uri_attributes.include?(attr_name) && - (value =~ /(^[^\/:]*):|(�*58)|(p)|(�*3a)|(%|%)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip)) - end - end -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb b/actionview/lib/action_view/vendor/html-scanner/html/selector.rb deleted file mode 100644 index dfdd724b9b..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb +++ /dev/null @@ -1,830 +0,0 @@ -#-- -# Copyright (c) 2006 Assaf Arkin (http://labnotes.org) -# Under MIT and/or CC By license. -#++ - -module HTML - - # Selects HTML elements using CSS 2 selectors. - # - # The +Selector+ class uses CSS selector expressions to match and select - # HTML elements. - # - # For example: - # selector = HTML::Selector.new "form.login[action=/login]" - # creates a new selector that matches any +form+ element with the class - # +login+ and an attribute +action+ with the value <tt>/login</tt>. - # - # === Matching Elements - # - # Use the #match method to determine if an element matches the selector. - # - # For simple selectors, the method returns an array with that element, - # or +nil+ if the element does not match. For complex selectors (see below) - # the method returns an array with all matched elements, of +nil+ if no - # match found. - # - # For example: - # if selector.match(element) - # puts "Element is a login form" - # end - # - # === Selecting Elements - # - # Use the #select method to select all matching elements starting with - # one element and going through all children in depth-first order. - # - # This method returns an array of all matching elements, an empty array - # if no match is found - # - # For example: - # selector = HTML::Selector.new "input[type=text]" - # matches = selector.select(element) - # matches.each do |match| - # puts "Found text field with name #{match.attributes['name']}" - # end - # - # === Expressions - # - # Selectors can match elements using any of the following criteria: - # * <tt>name</tt> -- Match an element based on its name (tag name). - # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt> - # to match any element. - # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the - # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>. - # * <tt>.class</tt> -- Match an element based on its class name, all - # class names if more than one specified. - # * <tt>[attr]</tt> -- Match an element that has the specified attribute. - # * <tt>[attr=value]</tt> -- Match an element that has the specified - # attribute and value. (More operators are supported see below) - # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class, - # such as <tt>:nth-child</tt> and <tt>:empty</tt>. - # * <tt>:not(expr)</tt> -- Match an element that does not match the - # negation expression. - # - # When using a combination of the above, the element name comes first - # followed by identifier, class names, attributes, pseudo classes and - # negation in any order. Do not separate these parts with spaces! - # Space separation is used for descendant selectors. - # - # For example: - # selector = HTML::Selector.new "form.login[action=/login]" - # The matched element must be of type +form+ and have the class +login+. - # It may have other classes, but the class +login+ is required to match. - # It must also have an attribute called +action+ with the value - # <tt>/login</tt>. - # - # This selector will match the following element: - # <form class="login form" method="post" action="/login"> - # but will not match the element: - # <form method="post" action="/logout"> - # - # === Attribute Values - # - # Several operators are supported for matching attributes: - # * <tt>name</tt> -- The element must have an attribute with that name. - # * <tt>name=value</tt> -- The element must have an attribute with that - # name and value. - # * <tt>name^=value</tt> -- The attribute value must start with the - # specified value. - # * <tt>name$=value</tt> -- The attribute value must end with the - # specified value. - # * <tt>name*=value</tt> -- The attribute value must contain the - # specified value. - # * <tt>name~=word</tt> -- The attribute value must contain the specified - # word (space separated). - # * <tt>name|=word</tt> -- The attribute value must start with specified - # word. - # - # For example, the following two selectors match the same element: - # #my_id - # [id=my_id] - # and so do the following two selectors: - # .my_class - # [class~=my_class] - # - # === Alternatives, siblings, children - # - # Complex selectors use a combination of expressions to match elements: - # * <tt>expr1 expr2</tt> -- Match any element against the second expression - # if it has some parent element that matches the first expression. - # * <tt>expr1 > expr2</tt> -- Match any element against the second expression - # if it is the child of an element that matches the first expression. - # * <tt>expr1 + expr2</tt> -- Match any element against the second expression - # if it immediately follows an element that matches the first expression. - # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression - # that comes after an element that matches the first expression. - # * <tt>expr1, expr2</tt> -- Match any element against the first expression, - # or against the second expression. - # - # Since children and sibling selectors may match more than one element given - # the first element, the #match method may return more than one match. - # - # === Pseudo classes - # - # Pseudo classes were introduced in CSS 3. They are most often used to select - # elements in a given position: - # * <tt>:root</tt> -- Match the element only if it is the root element - # (no parent element). - # * <tt>:empty</tt> -- Match the element only if it has no child elements, - # and no text content. - # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt> - # as its text content (ignoring leading and trailing whitespace). - # * <tt>:only-child</tt> -- Match the element if it is the only child (element) - # of its parent element. - # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element) - # of its parent element and its type. - # * <tt>:first-child</tt> -- Match the element if it is the first child (element) - # of its parent element. - # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element) - # of its parent element of its type. - # * <tt>:last-child</tt> -- Match the element if it is the last child (element) - # of its parent element. - # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element) - # of its parent element of its type. - # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element) - # of its parent element. The value <tt>b</tt> specifies its index, starting with 1. - # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element) - # in each group of <tt>a</tt> child elements of its parent element. - # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element) - # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child - # elements of its parent element. - # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third). - # Same as <tt>:nth-child(2n+1)</tt>. - # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second, - # fourth). Same as <tt>:nth-child(2n+2)</tt>. - # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type. - # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child. - # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and - # only elements of its type. - # * <tt>:not(selector)</tt> -- Match the element only if the element does not - # match the simple selector. - # - # As you can see, <tt>:nth-child</tt> pseudo class and its variant can get quite - # tricky and the CSS specification doesn't do a much better job explaining it. - # But after reading the examples and trying a few combinations, it's easy to - # figure out. - # - # For example: - # table tr:nth-child(odd) - # Selects every second row in the table starting with the first one. - # - # div p:nth-child(4) - # Selects the fourth paragraph in the +div+, but not if the +div+ contains - # other elements, since those are also counted. - # - # div p:nth-of-type(4) - # Selects the fourth paragraph in the +div+, counting only paragraphs, and - # ignoring all other elements. - # - # div p:nth-of-type(-n+4) - # Selects the first four paragraphs, ignoring all others. - # - # And you can always select an element that matches one set of rules but - # not another using <tt>:not</tt>. For example: - # p:not(.post) - # Matches all paragraphs that do not have the class <tt>.post</tt>. - # - # === Substitution Values - # - # You can use substitution with identifiers, class names and element values. - # A substitution takes the form of a question mark (<tt>?</tt>) and uses the - # next value in the argument list following the CSS expression. - # - # The substitution value may be a string or a regular expression. All other - # values are converted to strings. - # - # For example: - # selector = HTML::Selector.new "#?", /^\d+$/ - # matches any element whose identifier consists of one or more digits. - # - # See http://www.w3.org/TR/css3-selectors/ - class Selector - - - # An invalid selector. - class InvalidSelectorError < StandardError #:nodoc: - end - - - class << self - - # :call-seq: - # Selector.for_class(cls) => selector - # - # Creates a new selector for the given class name. - def for_class(cls) - self.new([".?", cls]) - end - - - # :call-seq: - # Selector.for_id(id) => selector - # - # Creates a new selector for the given id. - def for_id(id) - self.new(["#?", id]) - end - - end - - - # :call-seq: - # Selector.new(string, [values ...]) => selector - # - # Creates a new selector from a CSS 2 selector expression. - # - # The first argument is the selector expression. All other arguments - # are used for value substitution. - # - # Throws InvalidSelectorError is the selector expression is invalid. - def initialize(selector, *values) - raise ArgumentError, "CSS expression cannot be empty" if selector.empty? - @source = "" - values = values[0] if values.size == 1 && values[0].is_a?(Array) - - # We need a copy to determine if we failed to parse, and also - # preserve the original pass by-ref statement. - statement = selector.strip.dup - - # Create a simple selector, along with negation. - simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) } - - @alternates = [] - @depends = nil - - # Alternative selector. - if statement.sub!(/^\s*,\s*/, "") - second = Selector.new(statement, values) - @alternates << second - # If there are alternate selectors, we group them in the top selector. - if alternates = second.instance_variable_get(:@alternates) - second.instance_variable_set(:@alternates, []) - @alternates.concat alternates - end - @source << " , " << second.to_s - # Sibling selector: create a dependency into second selector that will - # match element immediately following this one. - elsif statement.sub!(/^\s*\+\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - if element = next_element(element) - second.match(element, first) - end - end - @source << " + " << second.to_s - # Adjacent selector: create a dependency into second selector that will - # match all elements following this one. - elsif statement.sub!(/^\s*~\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - while element = next_element(element) - if subset = second.match(element, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - end - end - matches.empty? ? nil : matches - end - @source << " ~ " << second.to_s - # Child selector: create a dependency into second selector that will - # match a child element of this one. - elsif statement.sub!(/^\s*>\s*/, "") - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - element.children.each do |child| - if child.tag? && subset = second.match(child, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - end - end - matches.empty? ? nil : matches - end - @source << " > " << second.to_s - # Descendant selector: create a dependency into second selector that - # will match all descendant elements of this one. Note, - elsif statement =~ /^\s+\S+/ && statement != selector - second = next_selector(statement, values) - @depends = lambda do |element, first| - matches = [] - stack = element.children.reverse - while node = stack.pop - next unless node.tag? - if subset = second.match(node, first) - if first && !subset.empty? - matches << subset.first - break - else - matches.concat subset - end - elsif children = node.children - stack.concat children.reverse - end - end - matches.empty? ? nil : matches - end - @source << " " << second.to_s - else - # The last selector is where we check that we parsed - # all the parts. - unless statement.empty? || statement.strip.empty? - raise ArgumentError, "Invalid selector: #{statement}" - end - end - end - - - # :call-seq: - # match(element, first?) => array or nil - # - # Matches an element against the selector. - # - # For a simple selector this method returns an array with the - # element if the element matches, nil otherwise. - # - # For a complex selector (sibling and descendant) this method - # returns an array with all matching elements, nil if no match is - # found. - # - # Use +first_only=true+ if you are only interested in the first element. - # - # For example: - # if selector.match(element) - # puts "Element is a login form" - # end - def match(element, first_only = false) - # Match element if no element name or element name same as element name - if matched = (!@tag_name || @tag_name == element.name) - # No match if one of the attribute matches failed - for attr in @attributes - if element.attributes[attr[0]] !~ attr[1] - matched = false - break - end - end - end - - # Pseudo class matches (nth-child, empty, etc). - if matched - for pseudo in @pseudo - unless pseudo.call(element) - matched = false - break - end - end - end - - # Negation. Same rules as above, but we fail if a match is made. - if matched && @negation - for negation in @negation - if negation[:tag_name] == element.name - matched = false - else - for attr in negation[:attributes] - if element.attributes[attr[0]] =~ attr[1] - matched = false - break - end - end - end - if matched - for pseudo in negation[:pseudo] - if pseudo.call(element) - matched = false - break - end - end - end - break unless matched - end - end - - # If element matched but depends on another element (child, - # sibling, etc), apply the dependent matches instead. - if matched && @depends - matches = @depends.call(element, first_only) - else - matches = matched ? [element] : nil - end - - # If this selector is part of the group, try all the alternative - # selectors (unless first_only). - if !first_only || !matches - @alternates.each do |alternate| - break if matches && first_only - if subset = alternate.match(element, first_only) - if matches - matches.concat subset - else - matches = subset - end - end - end - end - - matches - end - - - # :call-seq: - # select(root) => array - # - # Selects and returns an array with all matching elements, beginning - # with one node and traversing through all children depth-first. - # Returns an empty array if no match is found. - # - # The root node may be any element in the document, or the document - # itself. - # - # For example: - # selector = HTML::Selector.new "input[type=text]" - # matches = selector.select(element) - # matches.each do |match| - # puts "Found text field with name #{match.attributes['name']}" - # end - def select(root) - matches = [] - stack = [root] - while node = stack.pop - if node.tag? && subset = match(node, false) - subset.each do |match| - matches << match unless matches.any? { |item| item.equal?(match) } - end - elsif children = node.children - stack.concat children.reverse - end - end - matches - end - - - # Similar to #select but returns the first matching element. Returns +nil+ - # if no element matches the selector. - def select_first(root) - stack = [root] - while node = stack.pop - if node.tag? && subset = match(node, true) - return subset.first if !subset.empty? - elsif children = node.children - stack.concat children.reverse - end - end - nil - end - - - def to_s #:nodoc: - @source - end - - - # Returns the next element after this one. Skips sibling text nodes. - # - # With the +name+ argument, returns the next element with that name, - # skipping other sibling elements. - def next_element(element, name = nil) - if siblings = element.parent.children - found = false - siblings.each do |node| - if node.equal?(element) - found = true - elsif found && node.tag? - return node if (name.nil? || node.name == name) - end - end - end - nil - end - - - protected - - - # Creates a simple selector given the statement and array of - # substitution values. - # - # Returns a hash with the values +tag_name+, +attributes+, - # +pseudo+ (classes) and +negation+. - # - # Called the first time with +can_negate+ true to allow - # negation. Called a second time with false since negation - # cannot be negated. - def simple_selector(statement, values, can_negate = true) - tag_name = nil - attributes = [] - pseudo = [] - negation = [] - - # Element name. (Note that in negation, this can come at - # any order, but for simplicity we allow if only first). - statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match| - match.strip! - tag_name = match.downcase unless match == "*" - @source << match - "" # Remove - end - - # Get identifier, class, attribute name, pseudo or negation. - while true - # Element identifier. - next if statement.sub!(/^#(\?|[\w\-]+)/) do - id = $1 - if id == "?" - id = values.shift - end - @source << "##{id}" - id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp) - attributes << ["id", id] - "" # Remove - end - - # Class name. - next if statement.sub!(/^\.([\w\-]+)/) do - class_name = $1 - @source << ".#{class_name}" - class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp) - attributes << ["class", class_name] - "" # Remove - end - - # Attribute value. - next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do - name, equality, value = $1, $2, $3 - if value == "?" - value = values.shift - else - # Handle single and double quotes. - value.strip! - if (value[0] == ?" || value[0] == ?') && value[0] == value[-1] - value = value[1..-2] - end - end - @source << "[#{name}#{equality}'#{value}']" - attributes << [name.downcase.strip, attribute_match(equality, value)] - "" # Remove - end - - # Root element only. - next if statement.sub!(/^:root/) do - pseudo << lambda do |element| - element.parent.nil? || !element.parent.tag? - end - @source << ":root" - "" # Remove - end - - # Nth-child including last and of-type. - next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match| - reverse = $1 == "last-" - of_type = $2 == "of-type" - @source << ":nth-#{$1}#{$2}(" - case $3 - when "odd" - pseudo << nth_child(2, 1, of_type, reverse) - @source << "odd)" - when "even" - pseudo << nth_child(2, 2, of_type, reverse) - @source << "even)" - when /^(\d+|\?)$/ # b only - b = ($1 == "?" ? values.shift : $1).to_i - pseudo << nth_child(0, b, of_type, reverse) - @source << "#{b})" - when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/ - a = ($1 == "?" ? values.shift : - $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i - b = ($2 == "?" ? values.shift : $2).to_i - pseudo << nth_child(a, b, of_type, reverse) - @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})") - else - raise ArgumentError, "Invalid nth-child #{match}" - end - "" # Remove - end - # First/last child (of type). - next if statement.sub!(/^:(first|last)-(child|of-type)/) do - reverse = $1 == "last" - of_type = $2 == "of-type" - pseudo << nth_child(0, 1, of_type, reverse) - @source << ":#{$1}-#{$2}" - "" # Remove - end - # Only child (of type). - next if statement.sub!(/^:only-(child|of-type)/) do - of_type = $1 == "of-type" - pseudo << only_child(of_type) - @source << ":only-#{$1}" - "" # Remove - end - - # Empty: no child elements or meaningful content (whitespaces - # are ignored). - next if statement.sub!(/^:empty/) do - pseudo << lambda do |element| - empty = true - for child in element.children - if child.tag? || !child.content.strip.empty? - empty = false - break - end - end - empty - end - @source << ":empty" - "" # Remove - end - # Content: match the text content of the element, stripping - # leading and trailing spaces. - next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do - content = $1 - if content == "?" - content = values.shift - elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1] - content = content[1..-2] - end - @source << ":content('#{content}')" - content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp) - pseudo << lambda do |element| - text = "" - for child in element.children - unless child.tag? - text << child.content - end - end - text.strip =~ content - end - "" # Remove - end - - # Negation. Create another simple selector to handle it. - if statement.sub!(/^:not\(\s*/, "") - raise ArgumentError, "Double negatives are not missing feature" unless can_negate - @source << ":not(" - negation << simple_selector(statement, values, false) - raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "") - @source << ")" - next - end - - # No match: moving on. - break - end - - # Return hash. The keys are mapped to instance variables. - {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation} - end - - - # Create a regular expression to match an attribute value based - # on the equality operator (=, ^=, |=, etc). - def attribute_match(equality, value) - regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s) - case equality - when "=" then - # Match the attribute value in full - Regexp.new("^#{regexp}$") - when "~=" then - # Match a space-separated word within the attribute value - Regexp.new("(^|\s)#{regexp}($|\s)") - when "^=" - # Match the beginning of the attribute value - Regexp.new("^#{regexp}") - when "$=" - # Match the end of the attribute value - Regexp.new("#{regexp}$") - when "*=" - # Match substring of the attribute value - regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp) - when "|=" then - # Match the first space-separated item of the attribute value - Regexp.new("^#{regexp}($|\s)") - else - raise InvalidSelectorError, "Invalid operation/value" unless value.empty? - # Match all attributes values (existence check) - // - end - end - - - # Returns a lambda that can match an element against the nth-child - # pseudo class, given the following arguments: - # * +a+ -- Value of a part. - # * +b+ -- Value of b part. - # * +of_type+ -- True to test only elements of this type (of-type). - # * +reverse+ -- True to count in reverse order (last-). - def nth_child(a, b, of_type, reverse) - # a = 0 means select at index b, if b = 0 nothing selected - return lambda { |element| false } if a == 0 && b == 0 - # a < 0 and b < 0 will never match against an index - return lambda { |element| false } if a < 0 && b < 0 - b = a + b + 1 if b < 0 # b < 0 just picks last element from each group - b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based - lambda do |element| - # Element must be inside parent element. - return false unless element.parent && element.parent.tag? - index = 0 - # Get siblings, reverse if counting from last. - siblings = element.parent.children - siblings = siblings.reverse if reverse - # Match element name if of-type, otherwise ignore name. - name = of_type ? element.name : nil - found = false - for child in siblings - # Skip text nodes/comments. - if child.tag? && (name == nil || child.name == name) - if a == 0 - # Shortcut when a == 0 no need to go past count - if index == b - found = child.equal?(element) - break - end - elsif a < 0 - # Only look for first b elements - break if index > b - if child.equal?(element) - found = (index % a) == 0 - break - end - else - # Otherwise, break if child found and count == an+b - if child.equal?(element) - found = (index % a) == b - break - end - end - index += 1 - end - end - found - end - end - - - # Creates a only child lambda. Pass +of-type+ to only look at - # elements of its type. - def only_child(of_type) - lambda do |element| - # Element must be inside parent element. - return false unless element.parent && element.parent.tag? - name = of_type ? element.name : nil - other = false - for child in element.parent.children - # Skip text nodes/comments. - if child.tag? && (name == nil || child.name == name) - unless child.equal?(element) - other = true - break - end - end - end - !other - end - end - - - # Called to create a dependent selector (sibling, descendant, etc). - # Passes the remainder of the statement that will be reduced to zero - # eventually, and array of substitution values. - # - # This method is called from four places, so it helps to put it here - # for reuse. The only logic deals with the need to detect comma - # separators (alternate) and apply them to the selector group of the - # top selector. - def next_selector(statement, values) - second = Selector.new(statement, values) - # If there are alternate selectors, we group them in the top selector. - if alternates = second.instance_variable_get(:@alternates) - second.instance_variable_set(:@alternates, []) - @alternates.concat alternates - end - second - end - - end - - - # See HTML::Selector.new - def self.selector(statement, *values) - Selector.new(statement, *values) - end - - - class Tag - - def select(selector, *values) - selector = HTML::Selector.new(selector, values) - selector.select(self) - end - - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb deleted file mode 100644 index adf4e45930..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb +++ /dev/null @@ -1,107 +0,0 @@ -require 'strscan' - -module HTML #:nodoc: - - # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each - # token is a string. Each string represents either "text", or an HTML element. - # - # This currently assumes valid XHTML, which means no free < or > characters. - # - # Usage: - # - # tokenizer = HTML::Tokenizer.new(text) - # while token = tokenizer.next - # p token - # end - class Tokenizer #:nodoc: - - # The current (byte) position in the text - attr_reader :position - - # The current line number - attr_reader :line - - # Create a new Tokenizer for the given text. - def initialize(text) - text.encode! - @scanner = StringScanner.new(text) - @position = 0 - @line = 0 - @current_line = 1 - end - - # Returns the next token in the sequence, or +nil+ if there are no more tokens in - # the stream. - def next - return nil if @scanner.eos? - @position = @scanner.pos - @line = @current_line - if @scanner.check(/<\S/) - update_current_line(scan_tag) - else - update_current_line(scan_text) - end - end - - private - - # Treat the text at the current position as a tag, and scan it. Supports - # comments, doctype tags, and regular tags, and ignores less-than and - # greater-than characters within quoted strings. - def scan_tag - tag = @scanner.getch - if @scanner.scan(/!--/) # comment - tag << @scanner.matched - tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/)) - elsif @scanner.scan(/!\[CDATA\[/) - tag << @scanner.matched - tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/)) - elsif @scanner.scan(/!/) # doctype - tag << @scanner.matched - tag << consume_quoted_regions - else - tag << consume_quoted_regions - end - tag - end - - # Scan all text up to the next < character and return it. - def scan_text - "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}" - end - - # Counts the number of newlines in the text and updates the current line - # accordingly. - def update_current_line(text) - text.scan(/\r?\n/) { @current_line += 1 } - end - - # Skips over quoted strings, so that less-than and greater-than characters - # within the strings are ignored. - def consume_quoted_regions - text = "" - loop do - match = @scanner.scan_until(/['"<>]/) or break - - delim = @scanner.matched - if delim == "<" - match = match.chop - @scanner.pos -= 1 - end - - text << match - break if delim == "<" || delim == ">" - - # consume the quoted region - while match = @scanner.scan_until(/[\\#{delim}]/) - text << match - break if @scanner.matched == delim - break if @scanner.eos? - text << @scanner.getch # skip the escaped character - end - end - text - end - end - -end diff --git a/actionview/lib/action_view/vendor/html-scanner/html/version.rb b/actionview/lib/action_view/vendor/html-scanner/html/version.rb deleted file mode 100644 index 6d645c3e14..0000000000 --- a/actionview/lib/action_view/vendor/html-scanner/html/version.rb +++ /dev/null @@ -1,11 +0,0 @@ -module HTML #:nodoc: - module Version #:nodoc: - - MAJOR = 0 - MINOR = 5 - TINY = 3 - - STRING = [ MAJOR, MINOR, TINY ].join(".") - - end -end |