aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_view
diff options
context:
space:
mode:
Diffstat (limited to 'actionpack/lib/action_view')
-rw-r--r--actionpack/lib/action_view/helpers/sanitize_helper.rb2
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner.rb20
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/document.rb68
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/node.rb532
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/sanitizer.rb188
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/selector.rb830
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/tokenizer.rb107
-rw-r--r--actionpack/lib/action_view/vendor/html-scanner/html/version.rb11
8 files changed, 1757 insertions, 1 deletions
diff --git a/actionpack/lib/action_view/helpers/sanitize_helper.rb b/actionpack/lib/action_view/helpers/sanitize_helper.rb
index aaf0e0344a..9c76c26ace 100644
--- a/actionpack/lib/action_view/helpers/sanitize_helper.rb
+++ b/actionpack/lib/action_view/helpers/sanitize_helper.rb
@@ -1,5 +1,5 @@
require 'active_support/core_ext/object/try'
-require 'action_controller/vendor/html-scanner'
+require 'action_view/vendor/html-scanner'
module ActionView
# = Action View Sanitize Helpers
diff --git a/actionpack/lib/action_view/vendor/html-scanner.rb b/actionpack/lib/action_view/vendor/html-scanner.rb
new file mode 100644
index 0000000000..879b31e60e
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner.rb
@@ -0,0 +1,20 @@
+$LOAD_PATH << "#{File.dirname(__FILE__)}/html-scanner"
+
+module HTML
+ extend ActiveSupport::Autoload
+
+ eager_autoload do
+ autoload :CDATA, 'html/node'
+ autoload :Document, 'html/document'
+ autoload :FullSanitizer, 'html/sanitizer'
+ autoload :LinkSanitizer, 'html/sanitizer'
+ autoload :Node, 'html/node'
+ autoload :Sanitizer, 'html/sanitizer'
+ autoload :Selector, 'html/selector'
+ autoload :Tag, 'html/node'
+ autoload :Text, 'html/node'
+ autoload :Tokenizer, 'html/tokenizer'
+ autoload :Version, 'html/version'
+ autoload :WhiteListSanitizer, 'html/sanitizer'
+ end
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/document.rb b/actionpack/lib/action_view/vendor/html-scanner/html/document.rb
new file mode 100644
index 0000000000..386820300a
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/document.rb
@@ -0,0 +1,68 @@
+require 'html/tokenizer'
+require 'html/node'
+require 'html/selector'
+require 'html/sanitizer'
+
+module HTML #:nodoc:
+ # A top-level HTML document. You give it a body of text, and it will parse that
+ # text into a tree of nodes.
+ class Document #:nodoc:
+
+ # The root of the parsed document.
+ attr_reader :root
+
+ # Create a new Document from the given text.
+ def initialize(text, strict=false, xml=false)
+ tokenizer = Tokenizer.new(text)
+ @root = Node.new(nil)
+ node_stack = [ @root ]
+ while token = tokenizer.next
+ node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)
+
+ node_stack.last.children << node unless node.tag? && node.closing == :close
+ if node.tag?
+ if node_stack.length > 1 && node.closing == :close
+ if node_stack.last.name == node.name
+ if node_stack.last.children.empty?
+ node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
+ end
+ node_stack.pop
+ else
+ open_start = node_stack.last.position - 20
+ open_start = 0 if open_start < 0
+ close_start = node.position - 20
+ close_start = 0 if close_start < 0
+ msg = <<EOF.strip
+ignoring attempt to close #{node_stack.last.name} with #{node.name}
+ opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
+ closed at byte #{node.position}, line #{node.line}
+ attributes at open: #{node_stack.last.attributes.inspect}
+ text around open: #{text[open_start,40].inspect}
+ text around close: #{text[close_start,40].inspect}
+EOF
+ strict ? raise(msg) : warn(msg)
+ end
+ elsif !node.childless?(xml) && node.closing != :close
+ node_stack.push node
+ end
+ end
+ end
+ end
+
+ # Search the tree for (and return) the first node that matches the given
+ # conditions. The conditions are interpreted differently for different node
+ # types, see HTML::Text#find and HTML::Tag#find.
+ def find(conditions)
+ @root.find(conditions)
+ end
+
+ # Search the tree for (and return) all nodes that match the given
+ # conditions. The conditions are interpreted differently for different node
+ # types, see HTML::Text#find and HTML::Tag#find.
+ def find_all(conditions)
+ @root.find_all(conditions)
+ end
+
+ end
+
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/node.rb b/actionpack/lib/action_view/vendor/html-scanner/html/node.rb
new file mode 100644
index 0000000000..4e1f016431
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/node.rb
@@ -0,0 +1,532 @@
+require 'strscan'
+
+module HTML #:nodoc:
+
+ class Conditions < Hash #:nodoc:
+ def initialize(hash)
+ super()
+ hash = { :content => hash } unless Hash === hash
+ hash = keys_to_symbols(hash)
+ hash.each do |k,v|
+ case k
+ when :tag, :content then
+ # keys are valid, and require no further processing
+ when :attributes then
+ hash[k] = keys_to_strings(v)
+ when :parent, :child, :ancestor, :descendant, :sibling, :before,
+ :after
+ hash[k] = Conditions.new(v)
+ when :children
+ hash[k] = v = keys_to_symbols(v)
+ v.each do |key,value|
+ case key
+ when :count, :greater_than, :less_than
+ # keys are valid, and require no further processing
+ when :only
+ v[key] = Conditions.new(value)
+ else
+ raise "illegal key #{key.inspect} => #{value.inspect}"
+ end
+ end
+ else
+ raise "illegal key #{k.inspect} => #{v.inspect}"
+ end
+ end
+ update hash
+ end
+
+ private
+
+ def keys_to_strings(hash)
+ Hash[hash.keys.map {|k| [k.to_s, hash[k]]}]
+ end
+
+ def keys_to_symbols(hash)
+ Hash[hash.keys.map do |k|
+ raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
+ [k.to_sym, hash[k]]
+ end]
+ end
+ end
+
+ # The base class of all nodes, textual and otherwise, in an HTML document.
+ class Node #:nodoc:
+ # The array of children of this node. Not all nodes have children.
+ attr_reader :children
+
+ # The parent node of this node. All nodes have a parent, except for the
+ # root node.
+ attr_reader :parent
+
+ # The line number of the input where this node was begun
+ attr_reader :line
+
+ # The byte position in the input where this node was begun
+ attr_reader :position
+
+ # Create a new node as a child of the given parent.
+ def initialize(parent, line=0, pos=0)
+ @parent = parent
+ @children = []
+ @line, @position = line, pos
+ end
+
+ # Return a textual representation of the node.
+ def to_s
+ @children.join()
+ end
+
+ # Return false (subclasses must override this to provide specific matching
+ # behavior.) +conditions+ may be of any type.
+ def match(conditions)
+ false
+ end
+
+ # Search the children of this node for the first node for which #find
+ # returns non +nil+. Returns the result of the #find call that succeeded.
+ def find(conditions)
+ conditions = validate_conditions(conditions)
+ @children.each do |child|
+ node = child.find(conditions)
+ return node if node
+ end
+ nil
+ end
+
+ # Search for all nodes that match the given conditions, and return them
+ # as an array.
+ def find_all(conditions)
+ conditions = validate_conditions(conditions)
+
+ matches = []
+ matches << self if match(conditions)
+ @children.each do |child|
+ matches.concat child.find_all(conditions)
+ end
+ matches
+ end
+
+ # Returns +false+. Subclasses may override this if they define a kind of
+ # tag.
+ def tag?
+ false
+ end
+
+ def validate_conditions(conditions)
+ Conditions === conditions ? conditions : Conditions.new(conditions)
+ end
+
+ def ==(node)
+ return false unless self.class == node.class && children.size == node.children.size
+
+ equivalent = true
+
+ children.size.times do |i|
+ equivalent &&= children[i] == node.children[i]
+ end
+
+ equivalent
+ end
+
+ class <<self
+ def parse(parent, line, pos, content, strict=true)
+ if content !~ /^<\S/
+ Text.new(parent, line, pos, content)
+ else
+ scanner = StringScanner.new(content)
+
+ unless scanner.skip(/</)
+ if strict
+ raise "expected <"
+ else
+ return Text.new(parent, line, pos, content)
+ end
+ end
+
+ if scanner.skip(/!\[CDATA\[/)
+ unless scanner.skip_until(/\]\]>/)
+ if strict
+ raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
+ else
+ scanner.skip_until(/\Z/)
+ end
+ end
+
+ return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
+ end
+
+ closing = ( scanner.scan(/\//) ? :close : nil )
+ return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)
+ name.downcase!
+
+ unless closing
+ scanner.skip(/\s*/)
+ attributes = {}
+ while attr = scanner.scan(/[-\w:]+/)
+ value = true
+ if scanner.scan(/\s*=\s*/)
+ if delim = scanner.scan(/['"]/)
+ value = ""
+ while text = scanner.scan(/[^#{delim}\\]+|./)
+ case text
+ when "\\" then
+ value << text
+ break if scanner.eos?
+ value << scanner.getch
+ when delim
+ break
+ else value << text
+ end
+ end
+ else
+ value = scanner.scan(/[^\s>\/]+/)
+ end
+ end
+ attributes[attr.downcase] = value
+ scanner.skip(/\s*/)
+ end
+
+ closing = ( scanner.scan(/\//) ? :self : nil )
+ end
+
+ unless scanner.scan(/\s*>/)
+ if strict
+ raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
+ else
+ # throw away all text until we find what we're looking for
+ scanner.skip_until(/>/) or scanner.terminate
+ end
+ end
+
+ Tag.new(parent, line, pos, name, attributes, closing)
+ end
+ end
+ end
+ end
+
+ # A node that represents text, rather than markup.
+ class Text < Node #:nodoc:
+
+ attr_reader :content
+
+ # Creates a new text node as a child of the given parent, with the given
+ # content.
+ def initialize(parent, line, pos, content)
+ super(parent, line, pos)
+ @content = content
+ end
+
+ # Returns the content of this node.
+ def to_s
+ @content
+ end
+
+ # Returns +self+ if this node meets the given conditions. Text nodes support
+ # conditions of the following kinds:
+ #
+ # * if +conditions+ is a string, it must be a substring of the node's
+ # content
+ # * if +conditions+ is a regular expression, it must match the node's
+ # content
+ # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
+ # is either a string or a regexp, and which is interpreted as described
+ # above.
+ def find(conditions)
+ match(conditions) && self
+ end
+
+ # Returns non-+nil+ if this node meets the given conditions, or +nil+
+ # otherwise. See the discussion of #find for the valid conditions.
+ def match(conditions)
+ case conditions
+ when String
+ @content == conditions
+ when Regexp
+ @content =~ conditions
+ when Hash
+ conditions = validate_conditions(conditions)
+
+ # Text nodes only have :content, :parent, :ancestor
+ unless (conditions.keys - [:content, :parent, :ancestor]).empty?
+ return false
+ end
+
+ match(conditions[:content])
+ else
+ nil
+ end
+ end
+
+ def ==(node)
+ return false unless super
+ content == node.content
+ end
+ end
+
+ # A CDATA node is simply a text node with a specialized way of displaying
+ # itself.
+ class CDATA < Text #:nodoc:
+ def to_s
+ "<![CDATA[#{super}]]>"
+ end
+ end
+
+ # A Tag is any node that represents markup. It may be an opening tag, a
+ # closing tag, or a self-closing tag. It has a name, and may have a hash of
+ # attributes.
+ class Tag < Node #:nodoc:
+
+ # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
+ attr_reader :closing
+
+ # Either +nil+, or a hash of attributes for this node.
+ attr_reader :attributes
+
+ # The name of this tag.
+ attr_reader :name
+
+ # Create a new node as a child of the given parent, using the given content
+ # to describe the node. It will be parsed and the node name, attributes and
+ # closing status extracted.
+ def initialize(parent, line, pos, name, attributes, closing)
+ super(parent, line, pos)
+ @name = name
+ @attributes = attributes
+ @closing = closing
+ end
+
+ # A convenience for obtaining an attribute of the node. Returns +nil+ if
+ # the node has no attributes.
+ def [](attr)
+ @attributes ? @attributes[attr] : nil
+ end
+
+ # Returns non-+nil+ if this tag can contain child nodes.
+ def childless?(xml = false)
+ return false if xml && @closing.nil?
+ !@closing.nil? ||
+ @name =~ /^(img|br|hr|link|meta|area|base|basefont|
+ col|frame|input|isindex|param)$/ox
+ end
+
+ # Returns a textual representation of the node
+ def to_s
+ if @closing == :close
+ "</#{@name}>"
+ else
+ s = "<#{@name}"
+ @attributes.each do |k,v|
+ s << " #{k}"
+ s << "=\"#{v}\"" if String === v
+ end
+ s << " /" if @closing == :self
+ s << ">"
+ @children.each { |child| s << child.to_s }
+ s << "</#{@name}>" if @closing != :self && !@children.empty?
+ s
+ end
+ end
+
+ # If either the node or any of its children meet the given conditions, the
+ # matching node is returned. Otherwise, +nil+ is returned. (See the
+ # description of the valid conditions in the +match+ method.)
+ def find(conditions)
+ match(conditions) && self || super
+ end
+
+ # Returns +true+, indicating that this node represents an HTML tag.
+ def tag?
+ true
+ end
+
+ # Returns +true+ if the node meets any of the given conditions. The
+ # +conditions+ parameter must be a hash of any of the following keys
+ # (all are optional):
+ #
+ # * <tt>:tag</tt>: the node name must match the corresponding value
+ # * <tt>:attributes</tt>: a hash. The node's values must match the
+ # corresponding values in the hash.
+ # * <tt>:parent</tt>: a hash. The node's parent must match the
+ # corresponding hash.
+ # * <tt>:child</tt>: a hash. At least one of the node's immediate children
+ # must meet the criteria described by the hash.
+ # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
+ # meet the criteria described by the hash.
+ # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
+ # must meet the criteria described by the hash.
+ # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
+ # meet the criteria described by the hash.
+ # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
+ # the criteria described by the hash, and at least one sibling must match.
+ # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
+ # the criteria described by the hash, and at least one sibling must match.
+ # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
+ # keys:
+ # ** <tt>:count</tt>: either a number or a range which must equal (or
+ # include) the number of children that match.
+ # ** <tt>:less_than</tt>: the number of matching children must be less than
+ # this number.
+ # ** <tt>:greater_than</tt>: the number of matching children must be
+ # greater than this number.
+ # ** <tt>:only</tt>: another hash consisting of the keys to use
+ # to match on the children, and only matching children will be
+ # counted.
+ #
+ # Conditions are matched using the following algorithm:
+ #
+ # * if the condition is a string, it must be a substring of the value.
+ # * if the condition is a regexp, it must match the value.
+ # * if the condition is a number, the value must match number.to_s.
+ # * if the condition is +true+, the value must not be +nil+.
+ # * if the condition is +false+ or +nil+, the value must be +nil+.
+ #
+ # Usage:
+ #
+ # # test if the node is a "span" tag
+ # node.match :tag => "span"
+ #
+ # # test if the node's parent is a "div"
+ # node.match :parent => { :tag => "div" }
+ #
+ # # test if any of the node's ancestors are "table" tags
+ # node.match :ancestor => { :tag => "table" }
+ #
+ # # test if any of the node's immediate children are "em" tags
+ # node.match :child => { :tag => "em" }
+ #
+ # # test if any of the node's descendants are "strong" tags
+ # node.match :descendant => { :tag => "strong" }
+ #
+ # # test if the node has between 2 and 4 span tags as immediate children
+ # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
+ #
+ # # get funky: test to see if the node is a "div", has a "ul" ancestor
+ # # and an "li" parent (with "class" = "enum"), and whether or not it has
+ # # a "span" descendant that contains # text matching /hello world/:
+ # node.match :tag => "div",
+ # :ancestor => { :tag => "ul" },
+ # :parent => { :tag => "li",
+ # :attributes => { :class => "enum" } },
+ # :descendant => { :tag => "span",
+ # :child => /hello world/ }
+ def match(conditions)
+ conditions = validate_conditions(conditions)
+ # check content of child nodes
+ if conditions[:content]
+ if children.empty?
+ return false unless match_condition("", conditions[:content])
+ else
+ return false unless children.find { |child| child.match(conditions[:content]) }
+ end
+ end
+
+ # test the name
+ return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
+
+ # test attributes
+ (conditions[:attributes] || {}).each do |key, value|
+ return false unless match_condition(self[key], value)
+ end
+
+ # test parent
+ return false unless parent.match(conditions[:parent]) if conditions[:parent]
+
+ # test children
+ return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
+
+ # test ancestors
+ if conditions[:ancestor]
+ return false unless catch :found do
+ p = self
+ throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
+ end
+ end
+
+ # test descendants
+ if conditions[:descendant]
+ return false unless children.find do |child|
+ # test the child
+ child.match(conditions[:descendant]) ||
+ # test the child's descendants
+ child.match(:descendant => conditions[:descendant])
+ end
+ end
+
+ # count children
+ if opts = conditions[:children]
+ matches = children.select do |c|
+ (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
+ end
+
+ matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
+ opts.each do |key, value|
+ next if key == :only
+ case key
+ when :count
+ if Integer === value
+ return false if matches.length != value
+ else
+ return false unless value.include?(matches.length)
+ end
+ when :less_than
+ return false unless matches.length < value
+ when :greater_than
+ return false unless matches.length > value
+ else raise "unknown count condition #{key}"
+ end
+ end
+ end
+
+ # test siblings
+ if conditions[:sibling] || conditions[:before] || conditions[:after]
+ siblings = parent ? parent.children : []
+ self_index = siblings.index(self)
+
+ if conditions[:sibling]
+ return false unless siblings.detect do |s|
+ s != self && s.match(conditions[:sibling])
+ end
+ end
+
+ if conditions[:before]
+ return false unless siblings[self_index+1..-1].detect do |s|
+ s != self && s.match(conditions[:before])
+ end
+ end
+
+ if conditions[:after]
+ return false unless siblings[0,self_index].detect do |s|
+ s != self && s.match(conditions[:after])
+ end
+ end
+ end
+
+ true
+ end
+
+ def ==(node)
+ return false unless super
+ return false unless closing == node.closing && self.name == node.name
+ attributes == node.attributes
+ end
+
+ private
+ # Match the given value to the given condition.
+ def match_condition(value, condition)
+ case condition
+ when String
+ value && value == condition
+ when Regexp
+ value && value.match(condition)
+ when Numeric
+ value == condition.to_s
+ when true
+ !value.nil?
+ when false, nil
+ value.nil?
+ else
+ false
+ end
+ end
+ end
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/sanitizer.rb b/actionpack/lib/action_view/vendor/html-scanner/html/sanitizer.rb
new file mode 100644
index 0000000000..6b4ececda2
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/sanitizer.rb
@@ -0,0 +1,188 @@
+require 'set'
+require 'cgi'
+require 'active_support/core_ext/class/attribute_accessors'
+
+module HTML
+ class Sanitizer
+ def sanitize(text, options = {})
+ validate_options(options)
+ return text unless sanitizeable?(text)
+ tokenize(text, options).join
+ end
+
+ def sanitizeable?(text)
+ !(text.nil? || text.empty? || !text.index("<"))
+ end
+
+ protected
+ def tokenize(text, options)
+ tokenizer = HTML::Tokenizer.new(text)
+ result = []
+ while token = tokenizer.next
+ node = Node.parse(nil, 0, 0, token, false)
+ process_node node, result, options
+ end
+ result
+ end
+
+ def process_node(node, result, options)
+ result << node.to_s
+ end
+
+ def validate_options(options)
+ if options[:tags] && !options[:tags].is_a?(Enumerable)
+ raise ArgumentError, "You should pass :tags as an Enumerable"
+ end
+
+ if options[:attributes] && !options[:attributes].is_a?(Enumerable)
+ raise ArgumentError, "You should pass :attributes as an Enumerable"
+ end
+ end
+ end
+
+ class FullSanitizer < Sanitizer
+ def sanitize(text, options = {})
+ result = super
+ # strip any comments, and if they have a newline at the end (ie. line with
+ # only a comment) strip that too
+ result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m)
+ # Recurse - handle all dirty nested tags
+ result == text ? result : sanitize(result, options)
+ end
+
+ def process_node(node, result, options)
+ result << node.to_s if node.class == HTML::Text
+ end
+ end
+
+ class LinkSanitizer < FullSanitizer
+ cattr_accessor :included_tags, :instance_writer => false
+ self.included_tags = Set.new(%w(a href))
+
+ def sanitizeable?(text)
+ !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
+ end
+
+ protected
+ def process_node(node, result, options)
+ result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
+ end
+ end
+
+ class WhiteListSanitizer < Sanitizer
+ [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
+ :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
+ class_attribute attr, :instance_writer => false
+ end
+
+ # A regular expression of the valid characters used to separate protocols like
+ # the ':' in 'http://foo.com'
+ self.protocol_separator = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
+
+ # Specifies a Set of HTML attributes that can have URIs.
+ self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
+
+ # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
+ # to just escaping harmless tags like &lt;font&gt;
+ self.bad_tags = Set.new(%w(script))
+
+ # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
+ self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
+ acronym a img blockquote del ins))
+
+ # Specifies the default Set of html attributes that the #sanitize helper will leave
+ # in the allowed tag.
+ self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
+
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+ self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
+ feed svn urn aim rsync tag ssh sftp rtsp afs))
+
+ # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+ self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
+ border-color border-left-color border-right-color border-top-color clear color cursor direction display
+ elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
+ overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
+ speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
+ width))
+
+ # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+ self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
+ collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
+ nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
+
+ # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+ self.shorthand_css_properties = Set.new(%w(background border margin padding))
+
+ # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
+ def sanitize_css(style)
+ # disallow urls
+ style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+
+ # gauntlet
+ if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
+ style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*$/
+ return ''
+ end
+
+ clean = []
+ style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
+ if allowed_css_properties.include?(prop.downcase)
+ clean << prop + ': ' + val + ';'
+ elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
+ unless val.split().any? do |keyword|
+ !allowed_css_keywords.include?(keyword) &&
+ keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
+ end
+ clean << prop + ': ' + val + ';'
+ end
+ end
+ end
+ clean.join(' ')
+ end
+
+ protected
+ def tokenize(text, options)
+ options[:parent] = []
+ options[:attributes] ||= allowed_attributes
+ options[:tags] ||= allowed_tags
+ super
+ end
+
+ def process_node(node, result, options)
+ result << case node
+ when HTML::Tag
+ if node.closing == :close
+ options[:parent].shift
+ else
+ options[:parent].unshift node.name
+ end
+
+ process_attributes_for node, options
+
+ options[:tags].include?(node.name) ? node : nil
+ else
+ bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
+ end
+ end
+
+ def process_attributes_for(node, options)
+ return unless node.attributes
+ node.attributes.keys.each do |attr_name|
+ value = node.attributes[attr_name].to_s
+
+ if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
+ node.attributes.delete(attr_name)
+ else
+ node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value))
+ end
+ end
+ end
+
+ def contains_bad_protocols?(attr_name, value)
+ uri_attributes.include?(attr_name) &&
+ (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip))
+ end
+ end
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/selector.rb b/actionpack/lib/action_view/vendor/html-scanner/html/selector.rb
new file mode 100644
index 0000000000..1eadfc0390
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/selector.rb
@@ -0,0 +1,830 @@
+#--
+# Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
+# Under MIT and/or CC By license.
+#++
+
+module HTML
+
+ # Selects HTML elements using CSS 2 selectors.
+ #
+ # The +Selector+ class uses CSS selector expressions to match and select
+ # HTML elements.
+ #
+ # For example:
+ # selector = HTML::Selector.new "form.login[action=/login]"
+ # creates a new selector that matches any +form+ element with the class
+ # +login+ and an attribute +action+ with the value <tt>/login</tt>.
+ #
+ # === Matching Elements
+ #
+ # Use the #match method to determine if an element matches the selector.
+ #
+ # For simple selectors, the method returns an array with that element,
+ # or +nil+ if the element does not match. For complex selectors (see below)
+ # the method returns an array with all matched elements, of +nil+ if no
+ # match found.
+ #
+ # For example:
+ # if selector.match(element)
+ # puts "Element is a login form"
+ # end
+ #
+ # === Selecting Elements
+ #
+ # Use the #select method to select all matching elements starting with
+ # one element and going through all children in depth-first order.
+ #
+ # This method returns an array of all matching elements, an empty array
+ # if no match is found
+ #
+ # For example:
+ # selector = HTML::Selector.new "input[type=text]"
+ # matches = selector.select(element)
+ # matches.each do |match|
+ # puts "Found text field with name #{match.attributes['name']}"
+ # end
+ #
+ # === Expressions
+ #
+ # Selectors can match elements using any of the following criteria:
+ # * <tt>name</tt> -- Match an element based on its name (tag name).
+ # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
+ # to match any element.
+ # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
+ # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
+ # * <tt>.class</tt> -- Match an element based on its class name, all
+ # class names if more than one specified.
+ # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
+ # * <tt>[attr=value]</tt> -- Match an element that has the specified
+ # attribute and value. (More operators are supported see below)
+ # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
+ # such as <tt>:nth-child</tt> and <tt>:empty</tt>.
+ # * <tt>:not(expr)</tt> -- Match an element that does not match the
+ # negation expression.
+ #
+ # When using a combination of the above, the element name comes first
+ # followed by identifier, class names, attributes, pseudo classes and
+ # negation in any order. Do not separate these parts with spaces!
+ # Space separation is used for descendant selectors.
+ #
+ # For example:
+ # selector = HTML::Selector.new "form.login[action=/login]"
+ # The matched element must be of type +form+ and have the class +login+.
+ # It may have other classes, but the class +login+ is required to match.
+ # It must also have an attribute called +action+ with the value
+ # <tt>/login</tt>.
+ #
+ # This selector will match the following element:
+ # <form class="login form" method="post" action="/login">
+ # but will not match the element:
+ # <form method="post" action="/logout">
+ #
+ # === Attribute Values
+ #
+ # Several operators are supported for matching attributes:
+ # * <tt>name</tt> -- The element must have an attribute with that name.
+ # * <tt>name=value</tt> -- The element must have an attribute with that
+ # name and value.
+ # * <tt>name^=value</tt> -- The attribute value must start with the
+ # specified value.
+ # * <tt>name$=value</tt> -- The attribute value must end with the
+ # specified value.
+ # * <tt>name*=value</tt> -- The attribute value must contain the
+ # specified value.
+ # * <tt>name~=word</tt> -- The attribute value must contain the specified
+ # word (space separated).
+ # * <tt>name|=word</tt> -- The attribute value must start with specified
+ # word.
+ #
+ # For example, the following two selectors match the same element:
+ # #my_id
+ # [id=my_id]
+ # and so do the following two selectors:
+ # .my_class
+ # [class~=my_class]
+ #
+ # === Alternatives, siblings, children
+ #
+ # Complex selectors use a combination of expressions to match elements:
+ # * <tt>expr1 expr2</tt> -- Match any element against the second expression
+ # if it has some parent element that matches the first expression.
+ # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
+ # if it is the child of an element that matches the first expression.
+ # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
+ # if it immediately follows an element that matches the first expression.
+ # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
+ # that comes after an element that matches the first expression.
+ # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
+ # or against the second expression.
+ #
+ # Since children and sibling selectors may match more than one element given
+ # the first element, the #match method may return more than one match.
+ #
+ # === Pseudo classes
+ #
+ # Pseudo classes were introduced in CSS 3. They are most often used to select
+ # elements in a given position:
+ # * <tt>:root</tt> -- Match the element only if it is the root element
+ # (no parent element).
+ # * <tt>:empty</tt> -- Match the element only if it has no child elements,
+ # and no text content.
+ # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt>
+ # as its text content (ignoring leading and trailing whitespace).
+ # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
+ # of its parent element.
+ # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
+ # of its parent element and its type.
+ # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
+ # of its parent element.
+ # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
+ # of its parent element of its type.
+ # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
+ # of its parent element.
+ # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
+ # of its parent element of its type.
+ # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
+ # of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
+ # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
+ # in each group of <tt>a</tt> child elements of its parent element.
+ # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
+ # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
+ # elements of its parent element.
+ # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
+ # Same as <tt>:nth-child(2n+1)</tt>.
+ # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
+ # fourth). Same as <tt>:nth-child(2n+2)</tt>.
+ # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
+ # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
+ # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
+ # only elements of its type.
+ # * <tt>:not(selector)</tt> -- Match the element only if the element does not
+ # match the simple selector.
+ #
+ # As you can see, <tt>:nth-child<tt> pseudo class and its variant can get quite
+ # tricky and the CSS specification doesn't do a much better job explaining it.
+ # But after reading the examples and trying a few combinations, it's easy to
+ # figure out.
+ #
+ # For example:
+ # table tr:nth-child(odd)
+ # Selects every second row in the table starting with the first one.
+ #
+ # div p:nth-child(4)
+ # Selects the fourth paragraph in the +div+, but not if the +div+ contains
+ # other elements, since those are also counted.
+ #
+ # div p:nth-of-type(4)
+ # Selects the fourth paragraph in the +div+, counting only paragraphs, and
+ # ignoring all other elements.
+ #
+ # div p:nth-of-type(-n+4)
+ # Selects the first four paragraphs, ignoring all others.
+ #
+ # And you can always select an element that matches one set of rules but
+ # not another using <tt>:not</tt>. For example:
+ # p:not(.post)
+ # Matches all paragraphs that do not have the class <tt>.post</tt>.
+ #
+ # === Substitution Values
+ #
+ # You can use substitution with identifiers, class names and element values.
+ # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
+ # next value in the argument list following the CSS expression.
+ #
+ # The substitution value may be a string or a regular expression. All other
+ # values are converted to strings.
+ #
+ # For example:
+ # selector = HTML::Selector.new "#?", /^\d+$/
+ # matches any element whose identifier consists of one or more digits.
+ #
+ # See http://www.w3.org/TR/css3-selectors/
+ class Selector
+
+
+ # An invalid selector.
+ class InvalidSelectorError < StandardError #:nodoc:
+ end
+
+
+ class << self
+
+ # :call-seq:
+ # Selector.for_class(cls) => selector
+ #
+ # Creates a new selector for the given class name.
+ def for_class(cls)
+ self.new([".?", cls])
+ end
+
+
+ # :call-seq:
+ # Selector.for_id(id) => selector
+ #
+ # Creates a new selector for the given id.
+ def for_id(id)
+ self.new(["#?", id])
+ end
+
+ end
+
+
+ # :call-seq:
+ # Selector.new(string, [values ...]) => selector
+ #
+ # Creates a new selector from a CSS 2 selector expression.
+ #
+ # The first argument is the selector expression. All other arguments
+ # are used for value substitution.
+ #
+ # Throws InvalidSelectorError is the selector expression is invalid.
+ def initialize(selector, *values)
+ raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
+ @source = ""
+ values = values[0] if values.size == 1 && values[0].is_a?(Array)
+
+ # We need a copy to determine if we failed to parse, and also
+ # preserve the original pass by-ref statement.
+ statement = selector.strip.dup
+
+ # Create a simple selector, along with negation.
+ simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
+
+ @alternates = []
+ @depends = nil
+
+ # Alternative selector.
+ if statement.sub!(/^\s*,\s*/, "")
+ second = Selector.new(statement, values)
+ @alternates << second
+ # If there are alternate selectors, we group them in the top selector.
+ if alternates = second.instance_variable_get(:@alternates)
+ second.instance_variable_set(:@alternates, [])
+ @alternates.concat alternates
+ end
+ @source << " , " << second.to_s
+ # Sibling selector: create a dependency into second selector that will
+ # match element immediately following this one.
+ elsif statement.sub!(/^\s*\+\s*/, "")
+ second = next_selector(statement, values)
+ @depends = lambda do |element, first|
+ if element = next_element(element)
+ second.match(element, first)
+ end
+ end
+ @source << " + " << second.to_s
+ # Adjacent selector: create a dependency into second selector that will
+ # match all elements following this one.
+ elsif statement.sub!(/^\s*~\s*/, "")
+ second = next_selector(statement, values)
+ @depends = lambda do |element, first|
+ matches = []
+ while element = next_element(element)
+ if subset = second.match(element, first)
+ if first && !subset.empty?
+ matches << subset.first
+ break
+ else
+ matches.concat subset
+ end
+ end
+ end
+ matches.empty? ? nil : matches
+ end
+ @source << " ~ " << second.to_s
+ # Child selector: create a dependency into second selector that will
+ # match a child element of this one.
+ elsif statement.sub!(/^\s*>\s*/, "")
+ second = next_selector(statement, values)
+ @depends = lambda do |element, first|
+ matches = []
+ element.children.each do |child|
+ if child.tag? && subset = second.match(child, first)
+ if first && !subset.empty?
+ matches << subset.first
+ break
+ else
+ matches.concat subset
+ end
+ end
+ end
+ matches.empty? ? nil : matches
+ end
+ @source << " > " << second.to_s
+ # Descendant selector: create a dependency into second selector that
+ # will match all descendant elements of this one. Note,
+ elsif statement =~ /^\s+\S+/ && statement != selector
+ second = next_selector(statement, values)
+ @depends = lambda do |element, first|
+ matches = []
+ stack = element.children.reverse
+ while node = stack.pop
+ next unless node.tag?
+ if subset = second.match(node, first)
+ if first && !subset.empty?
+ matches << subset.first
+ break
+ else
+ matches.concat subset
+ end
+ elsif children = node.children
+ stack.concat children.reverse
+ end
+ end
+ matches.empty? ? nil : matches
+ end
+ @source << " " << second.to_s
+ else
+ # The last selector is where we check that we parsed
+ # all the parts.
+ unless statement.empty? || statement.strip.empty?
+ raise ArgumentError, "Invalid selector: #{statement}"
+ end
+ end
+ end
+
+
+ # :call-seq:
+ # match(element, first?) => array or nil
+ #
+ # Matches an element against the selector.
+ #
+ # For a simple selector this method returns an array with the
+ # element if the element matches, nil otherwise.
+ #
+ # For a complex selector (sibling and descendant) this method
+ # returns an array with all matching elements, nil if no match is
+ # found.
+ #
+ # Use +first_only=true+ if you are only interested in the first element.
+ #
+ # For example:
+ # if selector.match(element)
+ # puts "Element is a login form"
+ # end
+ def match(element, first_only = false)
+ # Match element if no element name or element name same as element name
+ if matched = (!@tag_name || @tag_name == element.name)
+ # No match if one of the attribute matches failed
+ for attr in @attributes
+ if element.attributes[attr[0]] !~ attr[1]
+ matched = false
+ break
+ end
+ end
+ end
+
+ # Pseudo class matches (nth-child, empty, etc).
+ if matched
+ for pseudo in @pseudo
+ unless pseudo.call(element)
+ matched = false
+ break
+ end
+ end
+ end
+
+ # Negation. Same rules as above, but we fail if a match is made.
+ if matched && @negation
+ for negation in @negation
+ if negation[:tag_name] == element.name
+ matched = false
+ else
+ for attr in negation[:attributes]
+ if element.attributes[attr[0]] =~ attr[1]
+ matched = false
+ break
+ end
+ end
+ end
+ if matched
+ for pseudo in negation[:pseudo]
+ if pseudo.call(element)
+ matched = false
+ break
+ end
+ end
+ end
+ break unless matched
+ end
+ end
+
+ # If element matched but depends on another element (child,
+ # sibling, etc), apply the dependent matches instead.
+ if matched && @depends
+ matches = @depends.call(element, first_only)
+ else
+ matches = matched ? [element] : nil
+ end
+
+ # If this selector is part of the group, try all the alternative
+ # selectors (unless first_only).
+ if !first_only || !matches
+ @alternates.each do |alternate|
+ break if matches && first_only
+ if subset = alternate.match(element, first_only)
+ if matches
+ matches.concat subset
+ else
+ matches = subset
+ end
+ end
+ end
+ end
+
+ matches
+ end
+
+
+ # :call-seq:
+ # select(root) => array
+ #
+ # Selects and returns an array with all matching elements, beginning
+ # with one node and traversing through all children depth-first.
+ # Returns an empty array if no match is found.
+ #
+ # The root node may be any element in the document, or the document
+ # itself.
+ #
+ # For example:
+ # selector = HTML::Selector.new "input[type=text]"
+ # matches = selector.select(element)
+ # matches.each do |match|
+ # puts "Found text field with name #{match.attributes['name']}"
+ # end
+ def select(root)
+ matches = []
+ stack = [root]
+ while node = stack.pop
+ if node.tag? && subset = match(node, false)
+ subset.each do |match|
+ matches << match unless matches.any? { |item| item.equal?(match) }
+ end
+ elsif children = node.children
+ stack.concat children.reverse
+ end
+ end
+ matches
+ end
+
+
+ # Similar to #select but returns the first matching element. Returns +nil+
+ # if no element matches the selector.
+ def select_first(root)
+ stack = [root]
+ while node = stack.pop
+ if node.tag? && subset = match(node, true)
+ return subset.first if !subset.empty?
+ elsif children = node.children
+ stack.concat children.reverse
+ end
+ end
+ nil
+ end
+
+
+ def to_s #:nodoc:
+ @source
+ end
+
+
+ # Return the next element after this one. Skips sibling text nodes.
+ #
+ # With the +name+ argument, returns the next element with that name,
+ # skipping other sibling elements.
+ def next_element(element, name = nil)
+ if siblings = element.parent.children
+ found = false
+ siblings.each do |node|
+ if node.equal?(element)
+ found = true
+ elsif found && node.tag?
+ return node if (name.nil? || node.name == name)
+ end
+ end
+ end
+ nil
+ end
+
+
+ protected
+
+
+ # Creates a simple selector given the statement and array of
+ # substitution values.
+ #
+ # Returns a hash with the values +tag_name+, +attributes+,
+ # +pseudo+ (classes) and +negation+.
+ #
+ # Called the first time with +can_negate+ true to allow
+ # negation. Called a second time with false since negation
+ # cannot be negated.
+ def simple_selector(statement, values, can_negate = true)
+ tag_name = nil
+ attributes = []
+ pseudo = []
+ negation = []
+
+ # Element name. (Note that in negation, this can come at
+ # any order, but for simplicity we allow if only first).
+ statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
+ match.strip!
+ tag_name = match.downcase unless match == "*"
+ @source << match
+ "" # Remove
+ end
+
+ # Get identifier, class, attribute name, pseudo or negation.
+ while true
+ # Element identifier.
+ next if statement.sub!(/^#(\?|[\w\-]+)/) do |match|
+ id = $1
+ if id == "?"
+ id = values.shift
+ end
+ @source << "##{id}"
+ id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
+ attributes << ["id", id]
+ "" # Remove
+ end
+
+ # Class name.
+ next if statement.sub!(/^\.([\w\-]+)/) do |match|
+ class_name = $1
+ @source << ".#{class_name}"
+ class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
+ attributes << ["class", class_name]
+ "" # Remove
+ end
+
+ # Attribute value.
+ next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do |match|
+ name, equality, value = $1, $2, $3
+ if value == "?"
+ value = values.shift
+ else
+ # Handle single and double quotes.
+ value.strip!
+ if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
+ value = value[1..-2]
+ end
+ end
+ @source << "[#{name}#{equality}'#{value}']"
+ attributes << [name.downcase.strip, attribute_match(equality, value)]
+ "" # Remove
+ end
+
+ # Root element only.
+ next if statement.sub!(/^:root/) do |match|
+ pseudo << lambda do |element|
+ element.parent.nil? || !element.parent.tag?
+ end
+ @source << ":root"
+ "" # Remove
+ end
+
+ # Nth-child including last and of-type.
+ next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
+ reverse = $1 == "last-"
+ of_type = $2 == "of-type"
+ @source << ":nth-#{$1}#{$2}("
+ case $3
+ when "odd"
+ pseudo << nth_child(2, 1, of_type, reverse)
+ @source << "odd)"
+ when "even"
+ pseudo << nth_child(2, 2, of_type, reverse)
+ @source << "even)"
+ when /^(\d+|\?)$/ # b only
+ b = ($1 == "?" ? values.shift : $1).to_i
+ pseudo << nth_child(0, b, of_type, reverse)
+ @source << "#{b})"
+ when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
+ a = ($1 == "?" ? values.shift :
+ $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
+ b = ($2 == "?" ? values.shift : $2).to_i
+ pseudo << nth_child(a, b, of_type, reverse)
+ @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
+ else
+ raise ArgumentError, "Invalid nth-child #{match}"
+ end
+ "" # Remove
+ end
+ # First/last child (of type).
+ next if statement.sub!(/^:(first|last)-(child|of-type)/) do |match|
+ reverse = $1 == "last"
+ of_type = $2 == "of-type"
+ pseudo << nth_child(0, 1, of_type, reverse)
+ @source << ":#{$1}-#{$2}"
+ "" # Remove
+ end
+ # Only child (of type).
+ next if statement.sub!(/^:only-(child|of-type)/) do |match|
+ of_type = $1 == "of-type"
+ pseudo << only_child(of_type)
+ @source << ":only-#{$1}"
+ "" # Remove
+ end
+
+ # Empty: no child elements or meaningful content (whitespaces
+ # are ignored).
+ next if statement.sub!(/^:empty/) do |match|
+ pseudo << lambda do |element|
+ empty = true
+ for child in element.children
+ if child.tag? || !child.content.strip.empty?
+ empty = false
+ break
+ end
+ end
+ empty
+ end
+ @source << ":empty"
+ "" # Remove
+ end
+ # Content: match the text content of the element, stripping
+ # leading and trailing spaces.
+ next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do |match|
+ content = $1
+ if content == "?"
+ content = values.shift
+ elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
+ content = content[1..-2]
+ end
+ @source << ":content('#{content}')"
+ content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
+ pseudo << lambda do |element|
+ text = ""
+ for child in element.children
+ unless child.tag?
+ text << child.content
+ end
+ end
+ text.strip =~ content
+ end
+ "" # Remove
+ end
+
+ # Negation. Create another simple selector to handle it.
+ if statement.sub!(/^:not\(\s*/, "")
+ raise ArgumentError, "Double negatives are not missing feature" unless can_negate
+ @source << ":not("
+ negation << simple_selector(statement, values, false)
+ raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
+ @source << ")"
+ next
+ end
+
+ # No match: moving on.
+ break
+ end
+
+ # Return hash. The keys are mapped to instance variables.
+ {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
+ end
+
+
+ # Create a regular expression to match an attribute value based
+ # on the equality operator (=, ^=, |=, etc).
+ def attribute_match(equality, value)
+ regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
+ case equality
+ when "=" then
+ # Match the attribute value in full
+ Regexp.new("^#{regexp}$")
+ when "~=" then
+ # Match a space-separated word within the attribute value
+ Regexp.new("(^|\s)#{regexp}($|\s)")
+ when "^="
+ # Match the beginning of the attribute value
+ Regexp.new("^#{regexp}")
+ when "$="
+ # Match the end of the attribute value
+ Regexp.new("#{regexp}$")
+ when "*="
+ # Match substring of the attribute value
+ regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
+ when "|=" then
+ # Match the first space-separated item of the attribute value
+ Regexp.new("^#{regexp}($|\s)")
+ else
+ raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
+ # Match all attributes values (existence check)
+ //
+ end
+ end
+
+
+ # Returns a lambda that can match an element against the nth-child
+ # pseudo class, given the following arguments:
+ # * +a+ -- Value of a part.
+ # * +b+ -- Value of b part.
+ # * +of_type+ -- True to test only elements of this type (of-type).
+ # * +reverse+ -- True to count in reverse order (last-).
+ def nth_child(a, b, of_type, reverse)
+ # a = 0 means select at index b, if b = 0 nothing selected
+ return lambda { |element| false } if a == 0 && b == 0
+ # a < 0 and b < 0 will never match against an index
+ return lambda { |element| false } if a < 0 && b < 0
+ b = a + b + 1 if b < 0 # b < 0 just picks last element from each group
+ b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based
+ lambda do |element|
+ # Element must be inside parent element.
+ return false unless element.parent && element.parent.tag?
+ index = 0
+ # Get siblings, reverse if counting from last.
+ siblings = element.parent.children
+ siblings = siblings.reverse if reverse
+ # Match element name if of-type, otherwise ignore name.
+ name = of_type ? element.name : nil
+ found = false
+ for child in siblings
+ # Skip text nodes/comments.
+ if child.tag? && (name == nil || child.name == name)
+ if a == 0
+ # Shortcut when a == 0 no need to go past count
+ if index == b
+ found = child.equal?(element)
+ break
+ end
+ elsif a < 0
+ # Only look for first b elements
+ break if index > b
+ if child.equal?(element)
+ found = (index % a) == 0
+ break
+ end
+ else
+ # Otherwise, break if child found and count == an+b
+ if child.equal?(element)
+ found = (index % a) == b
+ break
+ end
+ end
+ index += 1
+ end
+ end
+ found
+ end
+ end
+
+
+ # Creates a only child lambda. Pass +of-type+ to only look at
+ # elements of its type.
+ def only_child(of_type)
+ lambda do |element|
+ # Element must be inside parent element.
+ return false unless element.parent && element.parent.tag?
+ name = of_type ? element.name : nil
+ other = false
+ for child in element.parent.children
+ # Skip text nodes/comments.
+ if child.tag? && (name == nil || child.name == name)
+ unless child.equal?(element)
+ other = true
+ break
+ end
+ end
+ end
+ !other
+ end
+ end
+
+
+ # Called to create a dependent selector (sibling, descendant, etc).
+ # Passes the remainder of the statement that will be reduced to zero
+ # eventually, and array of substitution values.
+ #
+ # This method is called from four places, so it helps to put it here
+ # for reuse. The only logic deals with the need to detect comma
+ # separators (alternate) and apply them to the selector group of the
+ # top selector.
+ def next_selector(statement, values)
+ second = Selector.new(statement, values)
+ # If there are alternate selectors, we group them in the top selector.
+ if alternates = second.instance_variable_get(:@alternates)
+ second.instance_variable_set(:@alternates, [])
+ @alternates.concat alternates
+ end
+ second
+ end
+
+ end
+
+
+ # See HTML::Selector.new
+ def self.selector(statement, *values)
+ Selector.new(statement, *values)
+ end
+
+
+ class Tag
+
+ def select(selector, *values)
+ selector = HTML::Selector.new(selector, values)
+ selector.select(self)
+ end
+
+ end
+
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_view/vendor/html-scanner/html/tokenizer.rb
new file mode 100644
index 0000000000..8ac8d34430
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/tokenizer.rb
@@ -0,0 +1,107 @@
+require 'strscan'
+
+module HTML #:nodoc:
+
+ # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
+ # token is a string. Each string represents either "text", or an HTML element.
+ #
+ # This currently assumes valid XHTML, which means no free < or > characters.
+ #
+ # Usage:
+ #
+ # tokenizer = HTML::Tokenizer.new(text)
+ # while token = tokenizer.next
+ # p token
+ # end
+ class Tokenizer #:nodoc:
+
+ # The current (byte) position in the text
+ attr_reader :position
+
+ # The current line number
+ attr_reader :line
+
+ # Create a new Tokenizer for the given text.
+ def initialize(text)
+ text.encode!
+ @scanner = StringScanner.new(text)
+ @position = 0
+ @line = 0
+ @current_line = 1
+ end
+
+ # Return the next token in the sequence, or +nil+ if there are no more tokens in
+ # the stream.
+ def next
+ return nil if @scanner.eos?
+ @position = @scanner.pos
+ @line = @current_line
+ if @scanner.check(/<\S/)
+ update_current_line(scan_tag)
+ else
+ update_current_line(scan_text)
+ end
+ end
+
+ private
+
+ # Treat the text at the current position as a tag, and scan it. Supports
+ # comments, doctype tags, and regular tags, and ignores less-than and
+ # greater-than characters within quoted strings.
+ def scan_tag
+ tag = @scanner.getch
+ if @scanner.scan(/!--/) # comment
+ tag << @scanner.matched
+ tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
+ elsif @scanner.scan(/!\[CDATA\[/)
+ tag << @scanner.matched
+ tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
+ elsif @scanner.scan(/!/) # doctype
+ tag << @scanner.matched
+ tag << consume_quoted_regions
+ else
+ tag << consume_quoted_regions
+ end
+ tag
+ end
+
+ # Scan all text up to the next < character and return it.
+ def scan_text
+ "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
+ end
+
+ # Counts the number of newlines in the text and updates the current line
+ # accordingly.
+ def update_current_line(text)
+ text.scan(/\r?\n/) { @current_line += 1 }
+ end
+
+ # Skips over quoted strings, so that less-than and greater-than characters
+ # within the strings are ignored.
+ def consume_quoted_regions
+ text = ""
+ loop do
+ match = @scanner.scan_until(/['"<>]/) or break
+
+ delim = @scanner.matched
+ if delim == "<"
+ match = match.chop
+ @scanner.pos -= 1
+ end
+
+ text << match
+ break if delim == "<" || delim == ">"
+
+ # consume the quoted region
+ while match = @scanner.scan_until(/[\\#{delim}]/)
+ text << match
+ break if @scanner.matched == delim
+ break if @scanner.eos?
+ text << @scanner.getch # skip the escaped character
+ end
+ end
+ text
+ end
+ end
+
+end
diff --git a/actionpack/lib/action_view/vendor/html-scanner/html/version.rb b/actionpack/lib/action_view/vendor/html-scanner/html/version.rb
new file mode 100644
index 0000000000..6d645c3e14
--- /dev/null
+++ b/actionpack/lib/action_view/vendor/html-scanner/html/version.rb
@@ -0,0 +1,11 @@
+module HTML #:nodoc:
+ module Version #:nodoc:
+
+ MAJOR = 0
+ MINOR = 5
+ TINY = 3
+
+ STRING = [ MAJOR, MINOR, TINY ].join(".")
+
+ end
+end