aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
diff options
context:
space:
mode:
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html/node.rb')
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/node.rb532
1 files changed, 0 insertions, 532 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
deleted file mode 100644
index 4e1f016431..0000000000
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
+++ /dev/null
@@ -1,532 +0,0 @@
-require 'strscan'
-
-module HTML #:nodoc:
-
- class Conditions < Hash #:nodoc:
- def initialize(hash)
- super()
- hash = { :content => hash } unless Hash === hash
- hash = keys_to_symbols(hash)
- hash.each do |k,v|
- case k
- when :tag, :content then
- # keys are valid, and require no further processing
- when :attributes then
- hash[k] = keys_to_strings(v)
- when :parent, :child, :ancestor, :descendant, :sibling, :before,
- :after
- hash[k] = Conditions.new(v)
- when :children
- hash[k] = v = keys_to_symbols(v)
- v.each do |key,value|
- case key
- when :count, :greater_than, :less_than
- # keys are valid, and require no further processing
- when :only
- v[key] = Conditions.new(value)
- else
- raise "illegal key #{key.inspect} => #{value.inspect}"
- end
- end
- else
- raise "illegal key #{k.inspect} => #{v.inspect}"
- end
- end
- update hash
- end
-
- private
-
- def keys_to_strings(hash)
- Hash[hash.keys.map {|k| [k.to_s, hash[k]]}]
- end
-
- def keys_to_symbols(hash)
- Hash[hash.keys.map do |k|
- raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
- [k.to_sym, hash[k]]
- end]
- end
- end
-
- # The base class of all nodes, textual and otherwise, in an HTML document.
- class Node #:nodoc:
- # The array of children of this node. Not all nodes have children.
- attr_reader :children
-
- # The parent node of this node. All nodes have a parent, except for the
- # root node.
- attr_reader :parent
-
- # The line number of the input where this node was begun
- attr_reader :line
-
- # The byte position in the input where this node was begun
- attr_reader :position
-
- # Create a new node as a child of the given parent.
- def initialize(parent, line=0, pos=0)
- @parent = parent
- @children = []
- @line, @position = line, pos
- end
-
- # Return a textual representation of the node.
- def to_s
- @children.join()
- end
-
- # Return false (subclasses must override this to provide specific matching
- # behavior.) +conditions+ may be of any type.
- def match(conditions)
- false
- end
-
- # Search the children of this node for the first node for which #find
- # returns non +nil+. Returns the result of the #find call that succeeded.
- def find(conditions)
- conditions = validate_conditions(conditions)
- @children.each do |child|
- node = child.find(conditions)
- return node if node
- end
- nil
- end
-
- # Search for all nodes that match the given conditions, and return them
- # as an array.
- def find_all(conditions)
- conditions = validate_conditions(conditions)
-
- matches = []
- matches << self if match(conditions)
- @children.each do |child|
- matches.concat child.find_all(conditions)
- end
- matches
- end
-
- # Returns +false+. Subclasses may override this if they define a kind of
- # tag.
- def tag?
- false
- end
-
- def validate_conditions(conditions)
- Conditions === conditions ? conditions : Conditions.new(conditions)
- end
-
- def ==(node)
- return false unless self.class == node.class && children.size == node.children.size
-
- equivalent = true
-
- children.size.times do |i|
- equivalent &&= children[i] == node.children[i]
- end
-
- equivalent
- end
-
- class <<self
- def parse(parent, line, pos, content, strict=true)
- if content !~ /^<\S/
- Text.new(parent, line, pos, content)
- else
- scanner = StringScanner.new(content)
-
- unless scanner.skip(/</)
- if strict
- raise "expected <"
- else
- return Text.new(parent, line, pos, content)
- end
- end
-
- if scanner.skip(/!\[CDATA\[/)
- unless scanner.skip_until(/\]\]>/)
- if strict
- raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
- else
- scanner.skip_until(/\Z/)
- end
- end
-
- return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
- end
-
- closing = ( scanner.scan(/\//) ? :close : nil )
- return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)
- name.downcase!
-
- unless closing
- scanner.skip(/\s*/)
- attributes = {}
- while attr = scanner.scan(/[-\w:]+/)
- value = true
- if scanner.scan(/\s*=\s*/)
- if delim = scanner.scan(/['"]/)
- value = ""
- while text = scanner.scan(/[^#{delim}\\]+|./)
- case text
- when "\\" then
- value << text
- break if scanner.eos?
- value << scanner.getch
- when delim
- break
- else value << text
- end
- end
- else
- value = scanner.scan(/[^\s>\/]+/)
- end
- end
- attributes[attr.downcase] = value
- scanner.skip(/\s*/)
- end
-
- closing = ( scanner.scan(/\//) ? :self : nil )
- end
-
- unless scanner.scan(/\s*>/)
- if strict
- raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
- else
- # throw away all text until we find what we're looking for
- scanner.skip_until(/>/) or scanner.terminate
- end
- end
-
- Tag.new(parent, line, pos, name, attributes, closing)
- end
- end
- end
- end
-
- # A node that represents text, rather than markup.
- class Text < Node #:nodoc:
-
- attr_reader :content
-
- # Creates a new text node as a child of the given parent, with the given
- # content.
- def initialize(parent, line, pos, content)
- super(parent, line, pos)
- @content = content
- end
-
- # Returns the content of this node.
- def to_s
- @content
- end
-
- # Returns +self+ if this node meets the given conditions. Text nodes support
- # conditions of the following kinds:
- #
- # * if +conditions+ is a string, it must be a substring of the node's
- # content
- # * if +conditions+ is a regular expression, it must match the node's
- # content
- # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
- # is either a string or a regexp, and which is interpreted as described
- # above.
- def find(conditions)
- match(conditions) && self
- end
-
- # Returns non-+nil+ if this node meets the given conditions, or +nil+
- # otherwise. See the discussion of #find for the valid conditions.
- def match(conditions)
- case conditions
- when String
- @content == conditions
- when Regexp
- @content =~ conditions
- when Hash
- conditions = validate_conditions(conditions)
-
- # Text nodes only have :content, :parent, :ancestor
- unless (conditions.keys - [:content, :parent, :ancestor]).empty?
- return false
- end
-
- match(conditions[:content])
- else
- nil
- end
- end
-
- def ==(node)
- return false unless super
- content == node.content
- end
- end
-
- # A CDATA node is simply a text node with a specialized way of displaying
- # itself.
- class CDATA < Text #:nodoc:
- def to_s
- "<![CDATA[#{super}]]>"
- end
- end
-
- # A Tag is any node that represents markup. It may be an opening tag, a
- # closing tag, or a self-closing tag. It has a name, and may have a hash of
- # attributes.
- class Tag < Node #:nodoc:
-
- # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
- attr_reader :closing
-
- # Either +nil+, or a hash of attributes for this node.
- attr_reader :attributes
-
- # The name of this tag.
- attr_reader :name
-
- # Create a new node as a child of the given parent, using the given content
- # to describe the node. It will be parsed and the node name, attributes and
- # closing status extracted.
- def initialize(parent, line, pos, name, attributes, closing)
- super(parent, line, pos)
- @name = name
- @attributes = attributes
- @closing = closing
- end
-
- # A convenience for obtaining an attribute of the node. Returns +nil+ if
- # the node has no attributes.
- def [](attr)
- @attributes ? @attributes[attr] : nil
- end
-
- # Returns non-+nil+ if this tag can contain child nodes.
- def childless?(xml = false)
- return false if xml && @closing.nil?
- !@closing.nil? ||
- @name =~ /^(img|br|hr|link|meta|area|base|basefont|
- col|frame|input|isindex|param)$/ox
- end
-
- # Returns a textual representation of the node
- def to_s
- if @closing == :close
- "</#{@name}>"
- else
- s = "<#{@name}"
- @attributes.each do |k,v|
- s << " #{k}"
- s << "=\"#{v}\"" if String === v
- end
- s << " /" if @closing == :self
- s << ">"
- @children.each { |child| s << child.to_s }
- s << "</#{@name}>" if @closing != :self && !@children.empty?
- s
- end
- end
-
- # If either the node or any of its children meet the given conditions, the
- # matching node is returned. Otherwise, +nil+ is returned. (See the
- # description of the valid conditions in the +match+ method.)
- def find(conditions)
- match(conditions) && self || super
- end
-
- # Returns +true+, indicating that this node represents an HTML tag.
- def tag?
- true
- end
-
- # Returns +true+ if the node meets any of the given conditions. The
- # +conditions+ parameter must be a hash of any of the following keys
- # (all are optional):
- #
- # * <tt>:tag</tt>: the node name must match the corresponding value
- # * <tt>:attributes</tt>: a hash. The node's values must match the
- # corresponding values in the hash.
- # * <tt>:parent</tt>: a hash. The node's parent must match the
- # corresponding hash.
- # * <tt>:child</tt>: a hash. At least one of the node's immediate children
- # must meet the criteria described by the hash.
- # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
- # meet the criteria described by the hash.
- # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
- # must meet the criteria described by the hash.
- # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
- # meet the criteria described by the hash.
- # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
- # keys:
- # ** <tt>:count</tt>: either a number or a range which must equal (or
- # include) the number of children that match.
- # ** <tt>:less_than</tt>: the number of matching children must be less than
- # this number.
- # ** <tt>:greater_than</tt>: the number of matching children must be
- # greater than this number.
- # ** <tt>:only</tt>: another hash consisting of the keys to use
- # to match on the children, and only matching children will be
- # counted.
- #
- # Conditions are matched using the following algorithm:
- #
- # * if the condition is a string, it must be a substring of the value.
- # * if the condition is a regexp, it must match the value.
- # * if the condition is a number, the value must match number.to_s.
- # * if the condition is +true+, the value must not be +nil+.
- # * if the condition is +false+ or +nil+, the value must be +nil+.
- #
- # Usage:
- #
- # # test if the node is a "span" tag
- # node.match :tag => "span"
- #
- # # test if the node's parent is a "div"
- # node.match :parent => { :tag => "div" }
- #
- # # test if any of the node's ancestors are "table" tags
- # node.match :ancestor => { :tag => "table" }
- #
- # # test if any of the node's immediate children are "em" tags
- # node.match :child => { :tag => "em" }
- #
- # # test if any of the node's descendants are "strong" tags
- # node.match :descendant => { :tag => "strong" }
- #
- # # test if the node has between 2 and 4 span tags as immediate children
- # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
- #
- # # get funky: test to see if the node is a "div", has a "ul" ancestor
- # # and an "li" parent (with "class" = "enum"), and whether or not it has
- # # a "span" descendant that contains # text matching /hello world/:
- # node.match :tag => "div",
- # :ancestor => { :tag => "ul" },
- # :parent => { :tag => "li",
- # :attributes => { :class => "enum" } },
- # :descendant => { :tag => "span",
- # :child => /hello world/ }
- def match(conditions)
- conditions = validate_conditions(conditions)
- # check content of child nodes
- if conditions[:content]
- if children.empty?
- return false unless match_condition("", conditions[:content])
- else
- return false unless children.find { |child| child.match(conditions[:content]) }
- end
- end
-
- # test the name
- return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
-
- # test attributes
- (conditions[:attributes] || {}).each do |key, value|
- return false unless match_condition(self[key], value)
- end
-
- # test parent
- return false unless parent.match(conditions[:parent]) if conditions[:parent]
-
- # test children
- return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
-
- # test ancestors
- if conditions[:ancestor]
- return false unless catch :found do
- p = self
- throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
- end
- end
-
- # test descendants
- if conditions[:descendant]
- return false unless children.find do |child|
- # test the child
- child.match(conditions[:descendant]) ||
- # test the child's descendants
- child.match(:descendant => conditions[:descendant])
- end
- end
-
- # count children
- if opts = conditions[:children]
- matches = children.select do |c|
- (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
- end
-
- matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
- opts.each do |key, value|
- next if key == :only
- case key
- when :count
- if Integer === value
- return false if matches.length != value
- else
- return false unless value.include?(matches.length)
- end
- when :less_than
- return false unless matches.length < value
- when :greater_than
- return false unless matches.length > value
- else raise "unknown count condition #{key}"
- end
- end
- end
-
- # test siblings
- if conditions[:sibling] || conditions[:before] || conditions[:after]
- siblings = parent ? parent.children : []
- self_index = siblings.index(self)
-
- if conditions[:sibling]
- return false unless siblings.detect do |s|
- s != self && s.match(conditions[:sibling])
- end
- end
-
- if conditions[:before]
- return false unless siblings[self_index+1..-1].detect do |s|
- s != self && s.match(conditions[:before])
- end
- end
-
- if conditions[:after]
- return false unless siblings[0,self_index].detect do |s|
- s != self && s.match(conditions[:after])
- end
- end
- end
-
- true
- end
-
- def ==(node)
- return false unless super
- return false unless closing == node.closing && self.name == node.name
- attributes == node.attributes
- end
-
- private
- # Match the given value to the given condition.
- def match_condition(value, condition)
- case condition
- when String
- value && value == condition
- when Regexp
- value && value.match(condition)
- when Numeric
- value == condition.to_s
- when true
- !value.nil?
- when false, nil
- value.nil?
- else
- false
- end
- end
- end
-end