diff options
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html/node.rb')
-rw-r--r-- | actionpack/lib/action_controller/vendor/html-scanner/html/node.rb | 532 |
1 files changed, 0 insertions, 532 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb deleted file mode 100644 index 4e1f016431..0000000000 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb +++ /dev/null @@ -1,532 +0,0 @@ -require 'strscan' - -module HTML #:nodoc: - - class Conditions < Hash #:nodoc: - def initialize(hash) - super() - hash = { :content => hash } unless Hash === hash - hash = keys_to_symbols(hash) - hash.each do |k,v| - case k - when :tag, :content then - # keys are valid, and require no further processing - when :attributes then - hash[k] = keys_to_strings(v) - when :parent, :child, :ancestor, :descendant, :sibling, :before, - :after - hash[k] = Conditions.new(v) - when :children - hash[k] = v = keys_to_symbols(v) - v.each do |key,value| - case key - when :count, :greater_than, :less_than - # keys are valid, and require no further processing - when :only - v[key] = Conditions.new(value) - else - raise "illegal key #{key.inspect} => #{value.inspect}" - end - end - else - raise "illegal key #{k.inspect} => #{v.inspect}" - end - end - update hash - end - - private - - def keys_to_strings(hash) - Hash[hash.keys.map {|k| [k.to_s, hash[k]]}] - end - - def keys_to_symbols(hash) - Hash[hash.keys.map do |k| - raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym) - [k.to_sym, hash[k]] - end] - end - end - - # The base class of all nodes, textual and otherwise, in an HTML document. - class Node #:nodoc: - # The array of children of this node. Not all nodes have children. - attr_reader :children - - # The parent node of this node. All nodes have a parent, except for the - # root node. - attr_reader :parent - - # The line number of the input where this node was begun - attr_reader :line - - # The byte position in the input where this node was begun - attr_reader :position - - # Create a new node as a child of the given parent. - def initialize(parent, line=0, pos=0) - @parent = parent - @children = [] - @line, @position = line, pos - end - - # Return a textual representation of the node. - def to_s - @children.join() - end - - # Return false (subclasses must override this to provide specific matching - # behavior.) +conditions+ may be of any type. - def match(conditions) - false - end - - # Search the children of this node for the first node for which #find - # returns non +nil+. Returns the result of the #find call that succeeded. - def find(conditions) - conditions = validate_conditions(conditions) - @children.each do |child| - node = child.find(conditions) - return node if node - end - nil - end - - # Search for all nodes that match the given conditions, and return them - # as an array. - def find_all(conditions) - conditions = validate_conditions(conditions) - - matches = [] - matches << self if match(conditions) - @children.each do |child| - matches.concat child.find_all(conditions) - end - matches - end - - # Returns +false+. Subclasses may override this if they define a kind of - # tag. - def tag? - false - end - - def validate_conditions(conditions) - Conditions === conditions ? conditions : Conditions.new(conditions) - end - - def ==(node) - return false unless self.class == node.class && children.size == node.children.size - - equivalent = true - - children.size.times do |i| - equivalent &&= children[i] == node.children[i] - end - - equivalent - end - - class <<self - def parse(parent, line, pos, content, strict=true) - if content !~ /^<\S/ - Text.new(parent, line, pos, content) - else - scanner = StringScanner.new(content) - - unless scanner.skip(/</) - if strict - raise "expected <" - else - return Text.new(parent, line, pos, content) - end - end - - if scanner.skip(/!\[CDATA\[/) - unless scanner.skip_until(/\]\]>/) - if strict - raise "expected ]]> (got #{scanner.rest.inspect} for #{content})" - else - scanner.skip_until(/\Z/) - end - end - - return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, '')) - end - - closing = ( scanner.scan(/\//) ? :close : nil ) - return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/) - name.downcase! - - unless closing - scanner.skip(/\s*/) - attributes = {} - while attr = scanner.scan(/[-\w:]+/) - value = true - if scanner.scan(/\s*=\s*/) - if delim = scanner.scan(/['"]/) - value = "" - while text = scanner.scan(/[^#{delim}\\]+|./) - case text - when "\\" then - value << text - break if scanner.eos? - value << scanner.getch - when delim - break - else value << text - end - end - else - value = scanner.scan(/[^\s>\/]+/) - end - end - attributes[attr.downcase] = value - scanner.skip(/\s*/) - end - - closing = ( scanner.scan(/\//) ? :self : nil ) - end - - unless scanner.scan(/\s*>/) - if strict - raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" - else - # throw away all text until we find what we're looking for - scanner.skip_until(/>/) or scanner.terminate - end - end - - Tag.new(parent, line, pos, name, attributes, closing) - end - end - end - end - - # A node that represents text, rather than markup. - class Text < Node #:nodoc: - - attr_reader :content - - # Creates a new text node as a child of the given parent, with the given - # content. - def initialize(parent, line, pos, content) - super(parent, line, pos) - @content = content - end - - # Returns the content of this node. - def to_s - @content - end - - # Returns +self+ if this node meets the given conditions. Text nodes support - # conditions of the following kinds: - # - # * if +conditions+ is a string, it must be a substring of the node's - # content - # * if +conditions+ is a regular expression, it must match the node's - # content - # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that - # is either a string or a regexp, and which is interpreted as described - # above. - def find(conditions) - match(conditions) && self - end - - # Returns non-+nil+ if this node meets the given conditions, or +nil+ - # otherwise. See the discussion of #find for the valid conditions. - def match(conditions) - case conditions - when String - @content == conditions - when Regexp - @content =~ conditions - when Hash - conditions = validate_conditions(conditions) - - # Text nodes only have :content, :parent, :ancestor - unless (conditions.keys - [:content, :parent, :ancestor]).empty? - return false - end - - match(conditions[:content]) - else - nil - end - end - - def ==(node) - return false unless super - content == node.content - end - end - - # A CDATA node is simply a text node with a specialized way of displaying - # itself. - class CDATA < Text #:nodoc: - def to_s - "<![CDATA[#{super}]]>" - end - end - - # A Tag is any node that represents markup. It may be an opening tag, a - # closing tag, or a self-closing tag. It has a name, and may have a hash of - # attributes. - class Tag < Node #:nodoc: - - # Either +nil+, <tt>:close</tt>, or <tt>:self</tt> - attr_reader :closing - - # Either +nil+, or a hash of attributes for this node. - attr_reader :attributes - - # The name of this tag. - attr_reader :name - - # Create a new node as a child of the given parent, using the given content - # to describe the node. It will be parsed and the node name, attributes and - # closing status extracted. - def initialize(parent, line, pos, name, attributes, closing) - super(parent, line, pos) - @name = name - @attributes = attributes - @closing = closing - end - - # A convenience for obtaining an attribute of the node. Returns +nil+ if - # the node has no attributes. - def [](attr) - @attributes ? @attributes[attr] : nil - end - - # Returns non-+nil+ if this tag can contain child nodes. - def childless?(xml = false) - return false if xml && @closing.nil? - !@closing.nil? || - @name =~ /^(img|br|hr|link|meta|area|base|basefont| - col|frame|input|isindex|param)$/ox - end - - # Returns a textual representation of the node - def to_s - if @closing == :close - "</#{@name}>" - else - s = "<#{@name}" - @attributes.each do |k,v| - s << " #{k}" - s << "=\"#{v}\"" if String === v - end - s << " /" if @closing == :self - s << ">" - @children.each { |child| s << child.to_s } - s << "</#{@name}>" if @closing != :self && !@children.empty? - s - end - end - - # If either the node or any of its children meet the given conditions, the - # matching node is returned. Otherwise, +nil+ is returned. (See the - # description of the valid conditions in the +match+ method.) - def find(conditions) - match(conditions) && self || super - end - - # Returns +true+, indicating that this node represents an HTML tag. - def tag? - true - end - - # Returns +true+ if the node meets any of the given conditions. The - # +conditions+ parameter must be a hash of any of the following keys - # (all are optional): - # - # * <tt>:tag</tt>: the node name must match the corresponding value - # * <tt>:attributes</tt>: a hash. The node's values must match the - # corresponding values in the hash. - # * <tt>:parent</tt>: a hash. The node's parent must match the - # corresponding hash. - # * <tt>:child</tt>: a hash. At least one of the node's immediate children - # must meet the criteria described by the hash. - # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must - # meet the criteria described by the hash. - # * <tt>:descendant</tt>: a hash. At least one of the node's descendants - # must meet the criteria described by the hash. - # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must - # meet the criteria described by the hash. - # * <tt>:after</tt>: a hash. The node must be after any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:before</tt>: a hash. The node must be before any sibling meeting - # the criteria described by the hash, and at least one sibling must match. - # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the - # keys: - # ** <tt>:count</tt>: either a number or a range which must equal (or - # include) the number of children that match. - # ** <tt>:less_than</tt>: the number of matching children must be less than - # this number. - # ** <tt>:greater_than</tt>: the number of matching children must be - # greater than this number. - # ** <tt>:only</tt>: another hash consisting of the keys to use - # to match on the children, and only matching children will be - # counted. - # - # Conditions are matched using the following algorithm: - # - # * if the condition is a string, it must be a substring of the value. - # * if the condition is a regexp, it must match the value. - # * if the condition is a number, the value must match number.to_s. - # * if the condition is +true+, the value must not be +nil+. - # * if the condition is +false+ or +nil+, the value must be +nil+. - # - # Usage: - # - # # test if the node is a "span" tag - # node.match :tag => "span" - # - # # test if the node's parent is a "div" - # node.match :parent => { :tag => "div" } - # - # # test if any of the node's ancestors are "table" tags - # node.match :ancestor => { :tag => "table" } - # - # # test if any of the node's immediate children are "em" tags - # node.match :child => { :tag => "em" } - # - # # test if any of the node's descendants are "strong" tags - # node.match :descendant => { :tag => "strong" } - # - # # test if the node has between 2 and 4 span tags as immediate children - # node.match :children => { :count => 2..4, :only => { :tag => "span" } } - # - # # get funky: test to see if the node is a "div", has a "ul" ancestor - # # and an "li" parent (with "class" = "enum"), and whether or not it has - # # a "span" descendant that contains # text matching /hello world/: - # node.match :tag => "div", - # :ancestor => { :tag => "ul" }, - # :parent => { :tag => "li", - # :attributes => { :class => "enum" } }, - # :descendant => { :tag => "span", - # :child => /hello world/ } - def match(conditions) - conditions = validate_conditions(conditions) - # check content of child nodes - if conditions[:content] - if children.empty? - return false unless match_condition("", conditions[:content]) - else - return false unless children.find { |child| child.match(conditions[:content]) } - end - end - - # test the name - return false unless match_condition(@name, conditions[:tag]) if conditions[:tag] - - # test attributes - (conditions[:attributes] || {}).each do |key, value| - return false unless match_condition(self[key], value) - end - - # test parent - return false unless parent.match(conditions[:parent]) if conditions[:parent] - - # test children - return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] - - # test ancestors - if conditions[:ancestor] - return false unless catch :found do - p = self - throw :found, true if p.match(conditions[:ancestor]) while p = p.parent - end - end - - # test descendants - if conditions[:descendant] - return false unless children.find do |child| - # test the child - child.match(conditions[:descendant]) || - # test the child's descendants - child.match(:descendant => conditions[:descendant]) - end - end - - # count children - if opts = conditions[:children] - matches = children.select do |c| - (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) - end - - matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] - opts.each do |key, value| - next if key == :only - case key - when :count - if Integer === value - return false if matches.length != value - else - return false unless value.include?(matches.length) - end - when :less_than - return false unless matches.length < value - when :greater_than - return false unless matches.length > value - else raise "unknown count condition #{key}" - end - end - end - - # test siblings - if conditions[:sibling] || conditions[:before] || conditions[:after] - siblings = parent ? parent.children : [] - self_index = siblings.index(self) - - if conditions[:sibling] - return false unless siblings.detect do |s| - s != self && s.match(conditions[:sibling]) - end - end - - if conditions[:before] - return false unless siblings[self_index+1..-1].detect do |s| - s != self && s.match(conditions[:before]) - end - end - - if conditions[:after] - return false unless siblings[0,self_index].detect do |s| - s != self && s.match(conditions[:after]) - end - end - end - - true - end - - def ==(node) - return false unless super - return false unless closing == node.closing && self.name == node.name - attributes == node.attributes - end - - private - # Match the given value to the given condition. - def match_condition(value, condition) - case condition - when String - value && value == condition - when Regexp - value && value.match(condition) - when Numeric - value == condition.to_s - when true - !value.nil? - when false, nil - value.nil? - else - false - end - end - end -end |