aboutsummaryrefslogtreecommitdiffstats
path: root/actionview/lib
diff options
context:
space:
mode:
authorRafael Mendonça França <rafaelmfranca@gmail.com>2014-07-10 16:52:00 -0300
committerRafael Mendonça França <rafaelmfranca@gmail.com>2014-07-10 16:52:00 -0300
commit3229eda00c2a38526787fddb6cc307cfee5c5ac6 (patch)
tree9b45bf0c52e5b20ab403a5ac0025b9b1b18a5ce3 /actionview/lib
parent6e76031e8f1f815b390f966cb21e25c66e5ded50 (diff)
parentff1b7e75357eb7797f25aeab33994765130db67f (diff)
downloadrails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.tar.gz
rails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.tar.bz2
rails-3229eda00c2a38526787fddb6cc307cfee5c5ac6.zip
Merge pull request #11218 from kaspth/loofah-integration
Loofah-integration Conflicts: actionpack/CHANGELOG.md actionview/CHANGELOG.md
Diffstat (limited to 'actionview/lib')
-rw-r--r--actionview/lib/action_view.rb1
-rw-r--r--actionview/lib/action_view/helpers/sanitize_helper.rb165
-rw-r--r--actionview/lib/action_view/test_case.rb18
-rw-r--r--actionview/lib/action_view/vendor/html-scanner.rb20
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/document.rb68
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/node.rb532
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb188
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/selector.rb830
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb107
-rw-r--r--actionview/lib/action_view/vendor/html-scanner/html/version.rb11
10 files changed, 71 insertions, 1869 deletions
diff --git a/actionview/lib/action_view.rb b/actionview/lib/action_view.rb
index 50712e0830..6a1837c6e2 100644
--- a/actionview/lib/action_view.rb
+++ b/actionview/lib/action_view.rb
@@ -86,7 +86,6 @@ module ActionView
super
ActionView::Helpers.eager_load!
ActionView::Template.eager_load!
- HTML.eager_load!
end
end
diff --git a/actionview/lib/action_view/helpers/sanitize_helper.rb b/actionview/lib/action_view/helpers/sanitize_helper.rb
index 049af275b6..168f327a87 100644
--- a/actionview/lib/action_view/helpers/sanitize_helper.rb
+++ b/actionview/lib/action_view/helpers/sanitize_helper.rb
@@ -1,5 +1,6 @@
require 'active_support/core_ext/object/try'
-require 'action_view/vendor/html-scanner'
+require 'active_support/deprecation'
+require 'rails-html-sanitizer'
module ActionView
# = Action View Sanitize Helpers
@@ -27,7 +28,29 @@ module ActionView
#
# <%= sanitize @article.body %>
#
- # Custom Use (only the mentioned tags and attributes are allowed, nothing else)
+ # Custom Use - Custom Scrubber
+ # (supply a Loofah::Scrubber that does the sanitization)
+ #
+ # scrubber can either wrap a block:
+ # scrubber = Loofah::Scrubber.new do |node|
+ # node.text = "dawn of cats"
+ # end
+ #
+ # or be a subclass of Loofah::Scrubber which responds to scrub:
+ # class KittyApocalypse < Loofah::Scrubber
+ # def scrub(node)
+ # node.text = "dawn of cats"
+ # end
+ # end
+ # scrubber = KittyApocalypse.new
+ #
+ # <%= sanitize @article.body, scrubber: scrubber %>
+ #
+ # A custom scrubber takes precedence over custom tags and attributes
+ # Learn more about scrubbers here: https://github.com/flavorjones/loofah
+ #
+ # Custom Use - tags and attributes
+ # (only the mentioned tags and attributes are allowed, nothing else)
#
# <%= sanitize @article.body, tags: %w(table tr td), attributes: %w(id class style) %>
#
@@ -65,9 +88,9 @@ module ActionView
self.class.white_list_sanitizer.sanitize_css(style)
end
- # Strips all HTML tags from the +html+, including comments. This uses the
- # html-scanner tokenizer and so its HTML parsing ability is limited by
- # that of html-scanner.
+ # Strips all HTML tags from the +html+, including comments. This uses
+ # Nokogiri for tokenization (via Loofah) and so its HTML parsing ability
+ # is limited by that of Nokogiri.
#
# strip_tags("Strip <i>these</i> tags!")
# # => Strip these tags!
@@ -98,47 +121,37 @@ module ActionView
module ClassMethods #:nodoc:
attr_writer :full_sanitizer, :link_sanitizer, :white_list_sanitizer
- def sanitized_protocol_separator
- white_list_sanitizer.protocol_separator
- end
+ [:protocol_separator,
+ :uri_attributes,
+ :bad_tags,
+ :allowed_css_properties,
+ :allowed_css_keywords,
+ :shorthand_css_properties,
+ :allowed_protocols].each do |meth|
+ meth_name = "sanitized_#{meth}"
+ imp = lambda do |name|
+ ActiveSupport::Deprecation.warn("#{name} is deprecated and has no effect.")
+ end
- def sanitized_uri_attributes
- white_list_sanitizer.uri_attributes
+ define_method(meth_name) { imp.(meth_name) }
+ define_method("#{meth_name}=") { |value| imp.("#{meth_name}=") }
end
- def sanitized_bad_tags
- white_list_sanitizer.bad_tags
+ # Vendors the full, link and white list sanitizers.
+ # Strictly for backwards compatibility with html-scanner.
+ def sanitizer_vendor
+ Rails::Html::Sanitizer
end
def sanitized_allowed_tags
- white_list_sanitizer.allowed_tags
+ Rails::Html::WhiteListSanitizer.allowed_tags
end
def sanitized_allowed_attributes
- white_list_sanitizer.allowed_attributes
- end
-
- def sanitized_allowed_css_properties
- white_list_sanitizer.allowed_css_properties
- end
-
- def sanitized_allowed_css_keywords
- white_list_sanitizer.allowed_css_keywords
- end
-
- def sanitized_shorthand_css_properties
- white_list_sanitizer.shorthand_css_properties
- end
-
- def sanitized_allowed_protocols
- white_list_sanitizer.allowed_protocols
- end
-
- def sanitized_protocol_separator=(value)
- white_list_sanitizer.protocol_separator = value
+ Rails::Html::WhiteListSanitizer.allowed_attributes
end
- # Gets the HTML::FullSanitizer instance used by +strip_tags+. Replace with
+ # Gets the Rails::Html::FullSanitizer instance used by +strip_tags+. Replace with
# any object that responds to +sanitize+.
#
# class Application < Rails::Application
@@ -146,21 +159,21 @@ module ActionView
# end
#
def full_sanitizer
- @full_sanitizer ||= HTML::FullSanitizer.new
+ @full_sanitizer ||= sanitizer_vendor.full_sanitizer.new
end
- # Gets the HTML::LinkSanitizer instance used by +strip_links+. Replace with
- # any object that responds to +sanitize+.
+ # Gets the Rails::Html::LinkSanitizer instance used by +strip_links+.
+ # Replace with any object that responds to +sanitize+.
#
# class Application < Rails::Application
# config.action_view.link_sanitizer = MySpecialSanitizer.new
# end
#
def link_sanitizer
- @link_sanitizer ||= HTML::LinkSanitizer.new
+ @link_sanitizer ||= sanitizer_vendor.link_sanitizer.new
end
- # Gets the HTML::WhiteListSanitizer instance used by sanitize and +sanitize_css+.
+ # Gets the Rails::Html::WhiteListSanitizer instance used by sanitize and +sanitize_css+.
# Replace with any object that responds to +sanitize+.
#
# class Application < Rails::Application
@@ -168,87 +181,27 @@ module ActionView
# end
#
def white_list_sanitizer
- @white_list_sanitizer ||= HTML::WhiteListSanitizer.new
- end
-
- # Adds valid HTML attributes that the +sanitize+ helper checks for URIs.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_uri_attributes = 'lowsrc', 'target'
- # end
- #
- def sanitized_uri_attributes=(attributes)
- HTML::WhiteListSanitizer.uri_attributes.merge(attributes)
+ @white_list_sanitizer ||= sanitizer_vendor.white_list_sanitizer.new
end
- # Adds to the Set of 'bad' tags for the +sanitize+ helper.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_bad_tags = 'embed', 'object'
- # end
- #
- def sanitized_bad_tags=(attributes)
- HTML::WhiteListSanitizer.bad_tags.merge(attributes)
- end
-
- # Adds to the Set of allowed tags for the +sanitize+ helper.
+ # Replaces the allowed tags for the +sanitize+ helper.
#
# class Application < Rails::Application
# config.action_view.sanitized_allowed_tags = 'table', 'tr', 'td'
# end
#
- def sanitized_allowed_tags=(attributes)
- HTML::WhiteListSanitizer.allowed_tags.merge(attributes)
+ def sanitized_allowed_tags=(tags)
+ Rails::Html::WhiteListSanitizer.allowed_tags = tags
end
- # Adds to the Set of allowed HTML attributes for the +sanitize+ helper.
+ # Replaces the allowed HTML attributes for the +sanitize+ helper.
#
# class Application < Rails::Application
# config.action_view.sanitized_allowed_attributes = ['onclick', 'longdesc']
# end
#
def sanitized_allowed_attributes=(attributes)
- HTML::WhiteListSanitizer.allowed_attributes.merge(attributes)
- end
-
- # Adds to the Set of allowed CSS properties for the #sanitize and +sanitize_css+ helpers.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_allowed_css_properties = 'expression'
- # end
- #
- def sanitized_allowed_css_properties=(attributes)
- HTML::WhiteListSanitizer.allowed_css_properties.merge(attributes)
- end
-
- # Adds to the Set of allowed CSS keywords for the +sanitize+ and +sanitize_css+ helpers.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_allowed_css_keywords = 'expression'
- # end
- #
- def sanitized_allowed_css_keywords=(attributes)
- HTML::WhiteListSanitizer.allowed_css_keywords.merge(attributes)
- end
-
- # Adds to the Set of allowed shorthand CSS properties for the +sanitize+ and +sanitize_css+ helpers.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_shorthand_css_properties = 'expression'
- # end
- #
- def sanitized_shorthand_css_properties=(attributes)
- HTML::WhiteListSanitizer.shorthand_css_properties.merge(attributes)
- end
-
- # Adds to the Set of allowed protocols for the +sanitize+ helper.
- #
- # class Application < Rails::Application
- # config.action_view.sanitized_allowed_protocols = 'ssh', 'feed'
- # end
- #
- def sanitized_allowed_protocols=(attributes)
- HTML::WhiteListSanitizer.allowed_protocols.merge(attributes)
+ Rails::Html::WhiteListSanitizer.allowed_attributes = attributes
end
end
end
diff --git a/actionview/lib/action_view/test_case.rb b/actionview/lib/action_view/test_case.rb
index 9e8e6f43d5..2d54b90570 100644
--- a/actionview/lib/action_view/test_case.rb
+++ b/actionview/lib/action_view/test_case.rb
@@ -3,6 +3,9 @@ require 'action_controller'
require 'action_controller/test_case'
require 'action_view'
+require 'loofah'
+require 'rails-dom-testing'
+
module ActionView
# = Action View Test Case
class TestCase < ActiveSupport::TestCase
@@ -34,6 +37,7 @@ module ActionView
extend ActiveSupport::Concern
include ActionDispatch::Assertions, ActionDispatch::TestProcess
+ include Rails::Dom::Testing::Assertions
include ActionController::TemplateAssertions
include ActionView::Context
@@ -99,7 +103,9 @@ module ActionView
def setup_with_controller
@controller = ActionView::TestCase::TestController.new
@request = @controller.request
- @output_buffer = ActiveSupport::SafeBuffer.new
+ # empty string ensures buffer has UTF-8 encoding as
+ # new without arguments returns ASCII-8BIT encoded buffer like String#new
+ @output_buffer = ActiveSupport::SafeBuffer.new ''
@rendered = ''
make_test_case_available_to_view!
@@ -151,11 +157,10 @@ module ActionView
private
- # Support the selector assertions
- #
# Need to experiment if this priority is the best one: rendered => output_buffer
- def response_from_page
- HTML::Document.new(@rendered.blank? ? @output_buffer : @rendered).root
+ def document_root_element
+ @html_document ||= Loofah.document(@rendered.blank? ? @output_buffer : @rendered)
+ @html_document.root
end
def say_no_to_protect_against_forgery!
@@ -236,7 +241,8 @@ module ActionView
:@test_passed,
:@view,
:@view_context_class,
- :@_subscribers
+ :@_subscribers,
+ :@html_document
]
def _user_defined_ivars
diff --git a/actionview/lib/action_view/vendor/html-scanner.rb b/actionview/lib/action_view/vendor/html-scanner.rb
deleted file mode 100644
index 775b827529..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner.rb
+++ /dev/null
@@ -1,20 +0,0 @@
-$LOAD_PATH.unshift "#{File.dirname(__FILE__)}/html-scanner"
-
-module HTML
- extend ActiveSupport::Autoload
-
- eager_autoload do
- autoload :CDATA, 'html/node'
- autoload :Document, 'html/document'
- autoload :FullSanitizer, 'html/sanitizer'
- autoload :LinkSanitizer, 'html/sanitizer'
- autoload :Node, 'html/node'
- autoload :Sanitizer, 'html/sanitizer'
- autoload :Selector, 'html/selector'
- autoload :Tag, 'html/node'
- autoload :Text, 'html/node'
- autoload :Tokenizer, 'html/tokenizer'
- autoload :Version, 'html/version'
- autoload :WhiteListSanitizer, 'html/sanitizer'
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/document.rb b/actionview/lib/action_view/vendor/html-scanner/html/document.rb
deleted file mode 100644
index 386820300a..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/document.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-require 'html/tokenizer'
-require 'html/node'
-require 'html/selector'
-require 'html/sanitizer'
-
-module HTML #:nodoc:
- # A top-level HTML document. You give it a body of text, and it will parse that
- # text into a tree of nodes.
- class Document #:nodoc:
-
- # The root of the parsed document.
- attr_reader :root
-
- # Create a new Document from the given text.
- def initialize(text, strict=false, xml=false)
- tokenizer = Tokenizer.new(text)
- @root = Node.new(nil)
- node_stack = [ @root ]
- while token = tokenizer.next
- node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)
-
- node_stack.last.children << node unless node.tag? && node.closing == :close
- if node.tag?
- if node_stack.length > 1 && node.closing == :close
- if node_stack.last.name == node.name
- if node_stack.last.children.empty?
- node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
- end
- node_stack.pop
- else
- open_start = node_stack.last.position - 20
- open_start = 0 if open_start < 0
- close_start = node.position - 20
- close_start = 0 if close_start < 0
- msg = <<EOF.strip
-ignoring attempt to close #{node_stack.last.name} with #{node.name}
- opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
- closed at byte #{node.position}, line #{node.line}
- attributes at open: #{node_stack.last.attributes.inspect}
- text around open: #{text[open_start,40].inspect}
- text around close: #{text[close_start,40].inspect}
-EOF
- strict ? raise(msg) : warn(msg)
- end
- elsif !node.childless?(xml) && node.closing != :close
- node_stack.push node
- end
- end
- end
- end
-
- # Search the tree for (and return) the first node that matches the given
- # conditions. The conditions are interpreted differently for different node
- # types, see HTML::Text#find and HTML::Tag#find.
- def find(conditions)
- @root.find(conditions)
- end
-
- # Search the tree for (and return) all nodes that match the given
- # conditions. The conditions are interpreted differently for different node
- # types, see HTML::Text#find and HTML::Tag#find.
- def find_all(conditions)
- @root.find_all(conditions)
- end
-
- end
-
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/node.rb b/actionview/lib/action_view/vendor/html-scanner/html/node.rb
deleted file mode 100644
index 27f0f2f6f8..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/node.rb
+++ /dev/null
@@ -1,532 +0,0 @@
-require 'strscan'
-
-module HTML #:nodoc:
-
- class Conditions < Hash #:nodoc:
- def initialize(hash)
- super()
- hash = { :content => hash } unless Hash === hash
- hash = keys_to_symbols(hash)
- hash.each do |k,v|
- case k
- when :tag, :content then
- # keys are valid, and require no further processing
- when :attributes then
- hash[k] = keys_to_strings(v)
- when :parent, :child, :ancestor, :descendant, :sibling, :before,
- :after
- hash[k] = Conditions.new(v)
- when :children
- hash[k] = v = keys_to_symbols(v)
- v.each do |key,value|
- case key
- when :count, :greater_than, :less_than
- # keys are valid, and require no further processing
- when :only
- v[key] = Conditions.new(value)
- else
- raise "illegal key #{key.inspect} => #{value.inspect}"
- end
- end
- else
- raise "illegal key #{k.inspect} => #{v.inspect}"
- end
- end
- update hash
- end
-
- private
-
- def keys_to_strings(hash)
- Hash[hash.keys.map {|k| [k.to_s, hash[k]]}]
- end
-
- def keys_to_symbols(hash)
- Hash[hash.keys.map do |k|
- raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
- [k.to_sym, hash[k]]
- end]
- end
- end
-
- # The base class of all nodes, textual and otherwise, in an HTML document.
- class Node #:nodoc:
- # The array of children of this node. Not all nodes have children.
- attr_reader :children
-
- # The parent node of this node. All nodes have a parent, except for the
- # root node.
- attr_reader :parent
-
- # The line number of the input where this node was begun
- attr_reader :line
-
- # The byte position in the input where this node was begun
- attr_reader :position
-
- # Create a new node as a child of the given parent.
- def initialize(parent, line=0, pos=0)
- @parent = parent
- @children = []
- @line, @position = line, pos
- end
-
- # Returns a textual representation of the node.
- def to_s
- @children.join()
- end
-
- # Returns false (subclasses must override this to provide specific matching
- # behavior.) +conditions+ may be of any type.
- def match(conditions)
- false
- end
-
- # Search the children of this node for the first node for which #find
- # returns non +nil+. Returns the result of the #find call that succeeded.
- def find(conditions)
- conditions = validate_conditions(conditions)
- @children.each do |child|
- node = child.find(conditions)
- return node if node
- end
- nil
- end
-
- # Search for all nodes that match the given conditions, and return them
- # as an array.
- def find_all(conditions)
- conditions = validate_conditions(conditions)
-
- matches = []
- matches << self if match(conditions)
- @children.each do |child|
- matches.concat child.find_all(conditions)
- end
- matches
- end
-
- # Returns +false+. Subclasses may override this if they define a kind of
- # tag.
- def tag?
- false
- end
-
- def validate_conditions(conditions)
- Conditions === conditions ? conditions : Conditions.new(conditions)
- end
-
- def ==(node)
- return false unless self.class == node.class && children.size == node.children.size
-
- equivalent = true
-
- children.size.times do |i|
- equivalent &&= children[i] == node.children[i]
- end
-
- equivalent
- end
-
- class <<self
- def parse(parent, line, pos, content, strict=true)
- if content !~ /^<\S/
- Text.new(parent, line, pos, content)
- else
- scanner = StringScanner.new(content)
-
- unless scanner.skip(/</)
- if strict
- raise "expected <"
- else
- return Text.new(parent, line, pos, content)
- end
- end
-
- if scanner.skip(/!\[CDATA\[/)
- unless scanner.skip_until(/\]\]>/)
- if strict
- raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
- else
- scanner.skip_until(/\Z/)
- end
- end
-
- return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
- end
-
- closing = ( scanner.scan(/\//) ? :close : nil )
- return Text.new(parent, line, pos, content) unless name = scanner.scan(/[^\s!>\/]+/)
- name.downcase!
-
- unless closing
- scanner.skip(/\s*/)
- attributes = {}
- while attr = scanner.scan(/[-\w:]+/)
- value = true
- if scanner.scan(/\s*=\s*/)
- if delim = scanner.scan(/['"]/)
- value = ""
- while text = scanner.scan(/[^#{delim}\\]+|./)
- case text
- when "\\" then
- value << text
- break if scanner.eos?
- value << scanner.getch
- when delim
- break
- else value << text
- end
- end
- else
- value = scanner.scan(/[^\s>\/]+/)
- end
- end
- attributes[attr.downcase] = value
- scanner.skip(/\s*/)
- end
-
- closing = ( scanner.scan(/\//) ? :self : nil )
- end
-
- unless scanner.scan(/\s*>/)
- if strict
- raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
- else
- # throw away all text until we find what we're looking for
- scanner.skip_until(/>/) or scanner.terminate
- end
- end
-
- Tag.new(parent, line, pos, name, attributes, closing)
- end
- end
- end
- end
-
- # A node that represents text, rather than markup.
- class Text < Node #:nodoc:
-
- attr_reader :content
-
- # Creates a new text node as a child of the given parent, with the given
- # content.
- def initialize(parent, line, pos, content)
- super(parent, line, pos)
- @content = content
- end
-
- # Returns the content of this node.
- def to_s
- @content
- end
-
- # Returns +self+ if this node meets the given conditions. Text nodes support
- # conditions of the following kinds:
- #
- # * if +conditions+ is a string, it must be a substring of the node's
- # content
- # * if +conditions+ is a regular expression, it must match the node's
- # content
- # * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
- # is either a string or a regexp, and which is interpreted as described
- # above.
- def find(conditions)
- match(conditions) && self
- end
-
- # Returns non-+nil+ if this node meets the given conditions, or +nil+
- # otherwise. See the discussion of #find for the valid conditions.
- def match(conditions)
- case conditions
- when String
- @content == conditions
- when Regexp
- @content =~ conditions
- when Hash
- conditions = validate_conditions(conditions)
-
- # Text nodes only have :content, :parent, :ancestor
- unless (conditions.keys - [:content, :parent, :ancestor]).empty?
- return false
- end
-
- match(conditions[:content])
- else
- nil
- end
- end
-
- def ==(node)
- return false unless super
- content == node.content
- end
- end
-
- # A CDATA node is simply a text node with a specialized way of displaying
- # itself.
- class CDATA < Text #:nodoc:
- def to_s
- "<![CDATA[#{super}]]>"
- end
- end
-
- # A Tag is any node that represents markup. It may be an opening tag, a
- # closing tag, or a self-closing tag. It has a name, and may have a hash of
- # attributes.
- class Tag < Node #:nodoc:
-
- # Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
- attr_reader :closing
-
- # Either +nil+, or a hash of attributes for this node.
- attr_reader :attributes
-
- # The name of this tag.
- attr_reader :name
-
- # Create a new node as a child of the given parent, using the given content
- # to describe the node. It will be parsed and the node name, attributes and
- # closing status extracted.
- def initialize(parent, line, pos, name, attributes, closing)
- super(parent, line, pos)
- @name = name
- @attributes = attributes
- @closing = closing
- end
-
- # A convenience for obtaining an attribute of the node. Returns +nil+ if
- # the node has no attributes.
- def [](attr)
- @attributes ? @attributes[attr] : nil
- end
-
- # Returns non-+nil+ if this tag can contain child nodes.
- def childless?(xml = false)
- return false if xml && @closing.nil?
- !@closing.nil? ||
- @name =~ /^(img|br|hr|link|meta|area|base|basefont|
- col|frame|input|isindex|param)$/ox
- end
-
- # Returns a textual representation of the node
- def to_s
- if @closing == :close
- "</#{@name}>"
- else
- s = "<#{@name}"
- @attributes.each do |k,v|
- s << " #{k}"
- s << "=\"#{v}\"" if String === v
- end
- s << " /" if @closing == :self
- s << ">"
- @children.each { |child| s << child.to_s }
- s << "</#{@name}>" if @closing != :self && !@children.empty?
- s
- end
- end
-
- # If either the node or any of its children meet the given conditions, the
- # matching node is returned. Otherwise, +nil+ is returned. (See the
- # description of the valid conditions in the +match+ method.)
- def find(conditions)
- match(conditions) && self || super
- end
-
- # Returns +true+, indicating that this node represents an HTML tag.
- def tag?
- true
- end
-
- # Returns +true+ if the node meets any of the given conditions. The
- # +conditions+ parameter must be a hash of any of the following keys
- # (all are optional):
- #
- # * <tt>:tag</tt>: the node name must match the corresponding value
- # * <tt>:attributes</tt>: a hash. The node's values must match the
- # corresponding values in the hash.
- # * <tt>:parent</tt>: a hash. The node's parent must match the
- # corresponding hash.
- # * <tt>:child</tt>: a hash. At least one of the node's immediate children
- # must meet the criteria described by the hash.
- # * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
- # meet the criteria described by the hash.
- # * <tt>:descendant</tt>: a hash. At least one of the node's descendants
- # must meet the criteria described by the hash.
- # * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
- # meet the criteria described by the hash.
- # * <tt>:after</tt>: a hash. The node must be after any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * <tt>:before</tt>: a hash. The node must be before any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
- # keys:
- # ** <tt>:count</tt>: either a number or a range which must equal (or
- # include) the number of children that match.
- # ** <tt>:less_than</tt>: the number of matching children must be less than
- # this number.
- # ** <tt>:greater_than</tt>: the number of matching children must be
- # greater than this number.
- # ** <tt>:only</tt>: another hash consisting of the keys to use
- # to match on the children, and only matching children will be
- # counted.
- #
- # Conditions are matched using the following algorithm:
- #
- # * if the condition is a string, it must be a substring of the value.
- # * if the condition is a regexp, it must match the value.
- # * if the condition is a number, the value must match number.to_s.
- # * if the condition is +true+, the value must not be +nil+.
- # * if the condition is +false+ or +nil+, the value must be +nil+.
- #
- # Usage:
- #
- # # test if the node is a "span" tag
- # node.match tag: "span"
- #
- # # test if the node's parent is a "div"
- # node.match parent: { tag: "div" }
- #
- # # test if any of the node's ancestors are "table" tags
- # node.match ancestor: { tag: "table" }
- #
- # # test if any of the node's immediate children are "em" tags
- # node.match child: { tag: "em" }
- #
- # # test if any of the node's descendants are "strong" tags
- # node.match descendant: { tag: "strong" }
- #
- # # test if the node has between 2 and 4 span tags as immediate children
- # node.match children: { count: 2..4, only: { tag: "span" } }
- #
- # # get funky: test to see if the node is a "div", has a "ul" ancestor
- # # and an "li" parent (with "class" = "enum"), and whether or not it has
- # # a "span" descendant that contains # text matching /hello world/:
- # node.match tag: "div",
- # ancestor: { tag: "ul" },
- # parent: { tag: "li",
- # attributes: { class: "enum" } },
- # descendant: { tag: "span",
- # child: /hello world/ }
- def match(conditions)
- conditions = validate_conditions(conditions)
- # check content of child nodes
- if conditions[:content]
- if children.empty?
- return false unless match_condition("", conditions[:content])
- else
- return false unless children.find { |child| child.match(conditions[:content]) }
- end
- end
-
- # test the name
- return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
-
- # test attributes
- (conditions[:attributes] || {}).each do |key, value|
- return false unless match_condition(self[key], value)
- end
-
- # test parent
- return false unless parent.match(conditions[:parent]) if conditions[:parent]
-
- # test children
- return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
-
- # test ancestors
- if conditions[:ancestor]
- return false unless catch :found do
- p = self
- throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
- end
- end
-
- # test descendants
- if conditions[:descendant]
- return false unless children.find do |child|
- # test the child
- child.match(conditions[:descendant]) ||
- # test the child's descendants
- child.match(:descendant => conditions[:descendant])
- end
- end
-
- # count children
- if opts = conditions[:children]
- matches = children.select do |c|
- (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
- end
-
- matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
- opts.each do |key, value|
- next if key == :only
- case key
- when :count
- if Integer === value
- return false if matches.length != value
- else
- return false unless value.include?(matches.length)
- end
- when :less_than
- return false unless matches.length < value
- when :greater_than
- return false unless matches.length > value
- else raise "unknown count condition #{key}"
- end
- end
- end
-
- # test siblings
- if conditions[:sibling] || conditions[:before] || conditions[:after]
- siblings = parent ? parent.children : []
- self_index = siblings.index(self)
-
- if conditions[:sibling]
- return false unless siblings.detect do |s|
- s != self && s.match(conditions[:sibling])
- end
- end
-
- if conditions[:before]
- return false unless siblings[self_index+1..-1].detect do |s|
- s != self && s.match(conditions[:before])
- end
- end
-
- if conditions[:after]
- return false unless siblings[0,self_index].detect do |s|
- s != self && s.match(conditions[:after])
- end
- end
- end
-
- true
- end
-
- def ==(node)
- return false unless super
- return false unless closing == node.closing && self.name == node.name
- attributes == node.attributes
- end
-
- private
- # Match the given value to the given condition.
- def match_condition(value, condition)
- case condition
- when String
- value && value == condition
- when Regexp
- value && value.match(condition)
- when Numeric
- value == condition.to_s
- when true
- !value.nil?
- when false, nil
- value.nil?
- else
- false
- end
- end
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb
deleted file mode 100644
index ed34eecf55..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb
+++ /dev/null
@@ -1,188 +0,0 @@
-require 'set'
-require 'cgi'
-require 'active_support/core_ext/module/attribute_accessors'
-
-module HTML
- class Sanitizer
- def sanitize(text, options = {})
- validate_options(options)
- return text unless sanitizeable?(text)
- tokenize(text, options).join
- end
-
- def sanitizeable?(text)
- !(text.nil? || text.empty? || !text.index("<"))
- end
-
- protected
- def tokenize(text, options)
- tokenizer = HTML::Tokenizer.new(text)
- result = []
- while token = tokenizer.next
- node = Node.parse(nil, 0, 0, token, false)
- process_node node, result, options
- end
- result
- end
-
- def process_node(node, result, options)
- result << node.to_s
- end
-
- def validate_options(options)
- if options[:tags] && !options[:tags].is_a?(Enumerable)
- raise ArgumentError, "You should pass :tags as an Enumerable"
- end
-
- if options[:attributes] && !options[:attributes].is_a?(Enumerable)
- raise ArgumentError, "You should pass :attributes as an Enumerable"
- end
- end
- end
-
- class FullSanitizer < Sanitizer
- def sanitize(text, options = {})
- result = super
- # strip any comments, and if they have a newline at the end (ie. line with
- # only a comment) strip that too
- result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m)
- # Recurse - handle all dirty nested tags
- result == text ? result : sanitize(result, options)
- end
-
- def process_node(node, result, options)
- result << node.to_s if node.class == HTML::Text
- end
- end
-
- class LinkSanitizer < FullSanitizer
- cattr_accessor :included_tags, :instance_writer => false
- self.included_tags = Set.new(%w(a href))
-
- def sanitizeable?(text)
- !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
- end
-
- protected
- def process_node(node, result, options)
- result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
- end
- end
-
- class WhiteListSanitizer < Sanitizer
- [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
- :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
- class_attribute attr, :instance_writer => false
- end
-
- # A regular expression of the valid characters used to separate protocols like
- # the ':' in 'http://foo.com'
- self.protocol_separator = /:|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i
-
- # Specifies a Set of HTML attributes that can have URIs.
- self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
-
- # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
- # to just escaping harmless tags like &lt;font&gt;
- self.bad_tags = Set.new(%w(script))
-
- # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
- self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
- sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
- acronym a img blockquote del ins))
-
- # Specifies the default Set of html attributes that the #sanitize helper will leave
- # in the allowed tag.
- self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
-
- # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
- self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
- feed svn urn aim rsync tag ssh sftp rtsp afs))
-
- # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
- self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
- border-color border-left-color border-right-color border-top-color clear color cursor direction display
- elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
- overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
- speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
- width))
-
- # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
- self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
- collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
- nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
-
- # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
- self.shorthand_css_properties = Set.new(%w(background border margin padding))
-
- # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
- def sanitize_css(style)
- # disallow urls
- style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
-
- # gauntlet
- if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ ||
- style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/
- return ''
- end
-
- clean = []
- style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
- if allowed_css_properties.include?(prop.downcase)
- clean << prop + ': ' + val + ';'
- elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
- unless val.split().any? do |keyword|
- !allowed_css_keywords.include?(keyword) &&
- keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
- end
- clean << prop + ': ' + val + ';'
- end
- end
- end
- clean.join(' ')
- end
-
- protected
- def tokenize(text, options)
- options[:parent] = []
- options[:attributes] ||= allowed_attributes
- options[:tags] ||= allowed_tags
- super
- end
-
- def process_node(node, result, options)
- result << case node
- when HTML::Tag
- if node.closing == :close
- options[:parent].shift
- else
- options[:parent].unshift node.name
- end
-
- process_attributes_for node, options
-
- options[:tags].include?(node.name) ? node : nil
- else
- bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
- end
- end
-
- def process_attributes_for(node, options)
- return unless node.attributes
- node.attributes.keys.each do |attr_name|
- value = node.attributes[attr_name].to_s
-
- if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
- node.attributes.delete(attr_name)
- else
- node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value))
- end
- end
- end
-
- def contains_bad_protocols?(attr_name, value)
- uri_attributes.include?(attr_name) &&
- (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip))
- end
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb b/actionview/lib/action_view/vendor/html-scanner/html/selector.rb
deleted file mode 100644
index dfdd724b9b..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb
+++ /dev/null
@@ -1,830 +0,0 @@
-#--
-# Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
-# Under MIT and/or CC By license.
-#++
-
-module HTML
-
- # Selects HTML elements using CSS 2 selectors.
- #
- # The +Selector+ class uses CSS selector expressions to match and select
- # HTML elements.
- #
- # For example:
- # selector = HTML::Selector.new "form.login[action=/login]"
- # creates a new selector that matches any +form+ element with the class
- # +login+ and an attribute +action+ with the value <tt>/login</tt>.
- #
- # === Matching Elements
- #
- # Use the #match method to determine if an element matches the selector.
- #
- # For simple selectors, the method returns an array with that element,
- # or +nil+ if the element does not match. For complex selectors (see below)
- # the method returns an array with all matched elements, of +nil+ if no
- # match found.
- #
- # For example:
- # if selector.match(element)
- # puts "Element is a login form"
- # end
- #
- # === Selecting Elements
- #
- # Use the #select method to select all matching elements starting with
- # one element and going through all children in depth-first order.
- #
- # This method returns an array of all matching elements, an empty array
- # if no match is found
- #
- # For example:
- # selector = HTML::Selector.new "input[type=text]"
- # matches = selector.select(element)
- # matches.each do |match|
- # puts "Found text field with name #{match.attributes['name']}"
- # end
- #
- # === Expressions
- #
- # Selectors can match elements using any of the following criteria:
- # * <tt>name</tt> -- Match an element based on its name (tag name).
- # For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
- # to match any element.
- # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
- # <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
- # * <tt>.class</tt> -- Match an element based on its class name, all
- # class names if more than one specified.
- # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
- # * <tt>[attr=value]</tt> -- Match an element that has the specified
- # attribute and value. (More operators are supported see below)
- # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
- # such as <tt>:nth-child</tt> and <tt>:empty</tt>.
- # * <tt>:not(expr)</tt> -- Match an element that does not match the
- # negation expression.
- #
- # When using a combination of the above, the element name comes first
- # followed by identifier, class names, attributes, pseudo classes and
- # negation in any order. Do not separate these parts with spaces!
- # Space separation is used for descendant selectors.
- #
- # For example:
- # selector = HTML::Selector.new "form.login[action=/login]"
- # The matched element must be of type +form+ and have the class +login+.
- # It may have other classes, but the class +login+ is required to match.
- # It must also have an attribute called +action+ with the value
- # <tt>/login</tt>.
- #
- # This selector will match the following element:
- # <form class="login form" method="post" action="/login">
- # but will not match the element:
- # <form method="post" action="/logout">
- #
- # === Attribute Values
- #
- # Several operators are supported for matching attributes:
- # * <tt>name</tt> -- The element must have an attribute with that name.
- # * <tt>name=value</tt> -- The element must have an attribute with that
- # name and value.
- # * <tt>name^=value</tt> -- The attribute value must start with the
- # specified value.
- # * <tt>name$=value</tt> -- The attribute value must end with the
- # specified value.
- # * <tt>name*=value</tt> -- The attribute value must contain the
- # specified value.
- # * <tt>name~=word</tt> -- The attribute value must contain the specified
- # word (space separated).
- # * <tt>name|=word</tt> -- The attribute value must start with specified
- # word.
- #
- # For example, the following two selectors match the same element:
- # #my_id
- # [id=my_id]
- # and so do the following two selectors:
- # .my_class
- # [class~=my_class]
- #
- # === Alternatives, siblings, children
- #
- # Complex selectors use a combination of expressions to match elements:
- # * <tt>expr1 expr2</tt> -- Match any element against the second expression
- # if it has some parent element that matches the first expression.
- # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
- # if it is the child of an element that matches the first expression.
- # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
- # if it immediately follows an element that matches the first expression.
- # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
- # that comes after an element that matches the first expression.
- # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
- # or against the second expression.
- #
- # Since children and sibling selectors may match more than one element given
- # the first element, the #match method may return more than one match.
- #
- # === Pseudo classes
- #
- # Pseudo classes were introduced in CSS 3. They are most often used to select
- # elements in a given position:
- # * <tt>:root</tt> -- Match the element only if it is the root element
- # (no parent element).
- # * <tt>:empty</tt> -- Match the element only if it has no child elements,
- # and no text content.
- # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt>
- # as its text content (ignoring leading and trailing whitespace).
- # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
- # of its parent element.
- # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
- # of its parent element and its type.
- # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
- # of its parent element.
- # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
- # of its parent element of its type.
- # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
- # of its parent element.
- # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
- # of its parent element of its type.
- # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
- # of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
- # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
- # in each group of <tt>a</tt> child elements of its parent element.
- # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
- # in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
- # elements of its parent element.
- # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
- # Same as <tt>:nth-child(2n+1)</tt>.
- # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
- # fourth). Same as <tt>:nth-child(2n+2)</tt>.
- # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
- # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
- # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
- # only elements of its type.
- # * <tt>:not(selector)</tt> -- Match the element only if the element does not
- # match the simple selector.
- #
- # As you can see, <tt>:nth-child</tt> pseudo class and its variant can get quite
- # tricky and the CSS specification doesn't do a much better job explaining it.
- # But after reading the examples and trying a few combinations, it's easy to
- # figure out.
- #
- # For example:
- # table tr:nth-child(odd)
- # Selects every second row in the table starting with the first one.
- #
- # div p:nth-child(4)
- # Selects the fourth paragraph in the +div+, but not if the +div+ contains
- # other elements, since those are also counted.
- #
- # div p:nth-of-type(4)
- # Selects the fourth paragraph in the +div+, counting only paragraphs, and
- # ignoring all other elements.
- #
- # div p:nth-of-type(-n+4)
- # Selects the first four paragraphs, ignoring all others.
- #
- # And you can always select an element that matches one set of rules but
- # not another using <tt>:not</tt>. For example:
- # p:not(.post)
- # Matches all paragraphs that do not have the class <tt>.post</tt>.
- #
- # === Substitution Values
- #
- # You can use substitution with identifiers, class names and element values.
- # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
- # next value in the argument list following the CSS expression.
- #
- # The substitution value may be a string or a regular expression. All other
- # values are converted to strings.
- #
- # For example:
- # selector = HTML::Selector.new "#?", /^\d+$/
- # matches any element whose identifier consists of one or more digits.
- #
- # See http://www.w3.org/TR/css3-selectors/
- class Selector
-
-
- # An invalid selector.
- class InvalidSelectorError < StandardError #:nodoc:
- end
-
-
- class << self
-
- # :call-seq:
- # Selector.for_class(cls) => selector
- #
- # Creates a new selector for the given class name.
- def for_class(cls)
- self.new([".?", cls])
- end
-
-
- # :call-seq:
- # Selector.for_id(id) => selector
- #
- # Creates a new selector for the given id.
- def for_id(id)
- self.new(["#?", id])
- end
-
- end
-
-
- # :call-seq:
- # Selector.new(string, [values ...]) => selector
- #
- # Creates a new selector from a CSS 2 selector expression.
- #
- # The first argument is the selector expression. All other arguments
- # are used for value substitution.
- #
- # Throws InvalidSelectorError is the selector expression is invalid.
- def initialize(selector, *values)
- raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
- @source = ""
- values = values[0] if values.size == 1 && values[0].is_a?(Array)
-
- # We need a copy to determine if we failed to parse, and also
- # preserve the original pass by-ref statement.
- statement = selector.strip.dup
-
- # Create a simple selector, along with negation.
- simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
-
- @alternates = []
- @depends = nil
-
- # Alternative selector.
- if statement.sub!(/^\s*,\s*/, "")
- second = Selector.new(statement, values)
- @alternates << second
- # If there are alternate selectors, we group them in the top selector.
- if alternates = second.instance_variable_get(:@alternates)
- second.instance_variable_set(:@alternates, [])
- @alternates.concat alternates
- end
- @source << " , " << second.to_s
- # Sibling selector: create a dependency into second selector that will
- # match element immediately following this one.
- elsif statement.sub!(/^\s*\+\s*/, "")
- second = next_selector(statement, values)
- @depends = lambda do |element, first|
- if element = next_element(element)
- second.match(element, first)
- end
- end
- @source << " + " << second.to_s
- # Adjacent selector: create a dependency into second selector that will
- # match all elements following this one.
- elsif statement.sub!(/^\s*~\s*/, "")
- second = next_selector(statement, values)
- @depends = lambda do |element, first|
- matches = []
- while element = next_element(element)
- if subset = second.match(element, first)
- if first && !subset.empty?
- matches << subset.first
- break
- else
- matches.concat subset
- end
- end
- end
- matches.empty? ? nil : matches
- end
- @source << " ~ " << second.to_s
- # Child selector: create a dependency into second selector that will
- # match a child element of this one.
- elsif statement.sub!(/^\s*>\s*/, "")
- second = next_selector(statement, values)
- @depends = lambda do |element, first|
- matches = []
- element.children.each do |child|
- if child.tag? && subset = second.match(child, first)
- if first && !subset.empty?
- matches << subset.first
- break
- else
- matches.concat subset
- end
- end
- end
- matches.empty? ? nil : matches
- end
- @source << " > " << second.to_s
- # Descendant selector: create a dependency into second selector that
- # will match all descendant elements of this one. Note,
- elsif statement =~ /^\s+\S+/ && statement != selector
- second = next_selector(statement, values)
- @depends = lambda do |element, first|
- matches = []
- stack = element.children.reverse
- while node = stack.pop
- next unless node.tag?
- if subset = second.match(node, first)
- if first && !subset.empty?
- matches << subset.first
- break
- else
- matches.concat subset
- end
- elsif children = node.children
- stack.concat children.reverse
- end
- end
- matches.empty? ? nil : matches
- end
- @source << " " << second.to_s
- else
- # The last selector is where we check that we parsed
- # all the parts.
- unless statement.empty? || statement.strip.empty?
- raise ArgumentError, "Invalid selector: #{statement}"
- end
- end
- end
-
-
- # :call-seq:
- # match(element, first?) => array or nil
- #
- # Matches an element against the selector.
- #
- # For a simple selector this method returns an array with the
- # element if the element matches, nil otherwise.
- #
- # For a complex selector (sibling and descendant) this method
- # returns an array with all matching elements, nil if no match is
- # found.
- #
- # Use +first_only=true+ if you are only interested in the first element.
- #
- # For example:
- # if selector.match(element)
- # puts "Element is a login form"
- # end
- def match(element, first_only = false)
- # Match element if no element name or element name same as element name
- if matched = (!@tag_name || @tag_name == element.name)
- # No match if one of the attribute matches failed
- for attr in @attributes
- if element.attributes[attr[0]] !~ attr[1]
- matched = false
- break
- end
- end
- end
-
- # Pseudo class matches (nth-child, empty, etc).
- if matched
- for pseudo in @pseudo
- unless pseudo.call(element)
- matched = false
- break
- end
- end
- end
-
- # Negation. Same rules as above, but we fail if a match is made.
- if matched && @negation
- for negation in @negation
- if negation[:tag_name] == element.name
- matched = false
- else
- for attr in negation[:attributes]
- if element.attributes[attr[0]] =~ attr[1]
- matched = false
- break
- end
- end
- end
- if matched
- for pseudo in negation[:pseudo]
- if pseudo.call(element)
- matched = false
- break
- end
- end
- end
- break unless matched
- end
- end
-
- # If element matched but depends on another element (child,
- # sibling, etc), apply the dependent matches instead.
- if matched && @depends
- matches = @depends.call(element, first_only)
- else
- matches = matched ? [element] : nil
- end
-
- # If this selector is part of the group, try all the alternative
- # selectors (unless first_only).
- if !first_only || !matches
- @alternates.each do |alternate|
- break if matches && first_only
- if subset = alternate.match(element, first_only)
- if matches
- matches.concat subset
- else
- matches = subset
- end
- end
- end
- end
-
- matches
- end
-
-
- # :call-seq:
- # select(root) => array
- #
- # Selects and returns an array with all matching elements, beginning
- # with one node and traversing through all children depth-first.
- # Returns an empty array if no match is found.
- #
- # The root node may be any element in the document, or the document
- # itself.
- #
- # For example:
- # selector = HTML::Selector.new "input[type=text]"
- # matches = selector.select(element)
- # matches.each do |match|
- # puts "Found text field with name #{match.attributes['name']}"
- # end
- def select(root)
- matches = []
- stack = [root]
- while node = stack.pop
- if node.tag? && subset = match(node, false)
- subset.each do |match|
- matches << match unless matches.any? { |item| item.equal?(match) }
- end
- elsif children = node.children
- stack.concat children.reverse
- end
- end
- matches
- end
-
-
- # Similar to #select but returns the first matching element. Returns +nil+
- # if no element matches the selector.
- def select_first(root)
- stack = [root]
- while node = stack.pop
- if node.tag? && subset = match(node, true)
- return subset.first if !subset.empty?
- elsif children = node.children
- stack.concat children.reverse
- end
- end
- nil
- end
-
-
- def to_s #:nodoc:
- @source
- end
-
-
- # Returns the next element after this one. Skips sibling text nodes.
- #
- # With the +name+ argument, returns the next element with that name,
- # skipping other sibling elements.
- def next_element(element, name = nil)
- if siblings = element.parent.children
- found = false
- siblings.each do |node|
- if node.equal?(element)
- found = true
- elsif found && node.tag?
- return node if (name.nil? || node.name == name)
- end
- end
- end
- nil
- end
-
-
- protected
-
-
- # Creates a simple selector given the statement and array of
- # substitution values.
- #
- # Returns a hash with the values +tag_name+, +attributes+,
- # +pseudo+ (classes) and +negation+.
- #
- # Called the first time with +can_negate+ true to allow
- # negation. Called a second time with false since negation
- # cannot be negated.
- def simple_selector(statement, values, can_negate = true)
- tag_name = nil
- attributes = []
- pseudo = []
- negation = []
-
- # Element name. (Note that in negation, this can come at
- # any order, but for simplicity we allow if only first).
- statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
- match.strip!
- tag_name = match.downcase unless match == "*"
- @source << match
- "" # Remove
- end
-
- # Get identifier, class, attribute name, pseudo or negation.
- while true
- # Element identifier.
- next if statement.sub!(/^#(\?|[\w\-]+)/) do
- id = $1
- if id == "?"
- id = values.shift
- end
- @source << "##{id}"
- id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
- attributes << ["id", id]
- "" # Remove
- end
-
- # Class name.
- next if statement.sub!(/^\.([\w\-]+)/) do
- class_name = $1
- @source << ".#{class_name}"
- class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
- attributes << ["class", class_name]
- "" # Remove
- end
-
- # Attribute value.
- next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do
- name, equality, value = $1, $2, $3
- if value == "?"
- value = values.shift
- else
- # Handle single and double quotes.
- value.strip!
- if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
- value = value[1..-2]
- end
- end
- @source << "[#{name}#{equality}'#{value}']"
- attributes << [name.downcase.strip, attribute_match(equality, value)]
- "" # Remove
- end
-
- # Root element only.
- next if statement.sub!(/^:root/) do
- pseudo << lambda do |element|
- element.parent.nil? || !element.parent.tag?
- end
- @source << ":root"
- "" # Remove
- end
-
- # Nth-child including last and of-type.
- next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
- reverse = $1 == "last-"
- of_type = $2 == "of-type"
- @source << ":nth-#{$1}#{$2}("
- case $3
- when "odd"
- pseudo << nth_child(2, 1, of_type, reverse)
- @source << "odd)"
- when "even"
- pseudo << nth_child(2, 2, of_type, reverse)
- @source << "even)"
- when /^(\d+|\?)$/ # b only
- b = ($1 == "?" ? values.shift : $1).to_i
- pseudo << nth_child(0, b, of_type, reverse)
- @source << "#{b})"
- when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
- a = ($1 == "?" ? values.shift :
- $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
- b = ($2 == "?" ? values.shift : $2).to_i
- pseudo << nth_child(a, b, of_type, reverse)
- @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
- else
- raise ArgumentError, "Invalid nth-child #{match}"
- end
- "" # Remove
- end
- # First/last child (of type).
- next if statement.sub!(/^:(first|last)-(child|of-type)/) do
- reverse = $1 == "last"
- of_type = $2 == "of-type"
- pseudo << nth_child(0, 1, of_type, reverse)
- @source << ":#{$1}-#{$2}"
- "" # Remove
- end
- # Only child (of type).
- next if statement.sub!(/^:only-(child|of-type)/) do
- of_type = $1 == "of-type"
- pseudo << only_child(of_type)
- @source << ":only-#{$1}"
- "" # Remove
- end
-
- # Empty: no child elements or meaningful content (whitespaces
- # are ignored).
- next if statement.sub!(/^:empty/) do
- pseudo << lambda do |element|
- empty = true
- for child in element.children
- if child.tag? || !child.content.strip.empty?
- empty = false
- break
- end
- end
- empty
- end
- @source << ":empty"
- "" # Remove
- end
- # Content: match the text content of the element, stripping
- # leading and trailing spaces.
- next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do
- content = $1
- if content == "?"
- content = values.shift
- elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
- content = content[1..-2]
- end
- @source << ":content('#{content}')"
- content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
- pseudo << lambda do |element|
- text = ""
- for child in element.children
- unless child.tag?
- text << child.content
- end
- end
- text.strip =~ content
- end
- "" # Remove
- end
-
- # Negation. Create another simple selector to handle it.
- if statement.sub!(/^:not\(\s*/, "")
- raise ArgumentError, "Double negatives are not missing feature" unless can_negate
- @source << ":not("
- negation << simple_selector(statement, values, false)
- raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
- @source << ")"
- next
- end
-
- # No match: moving on.
- break
- end
-
- # Return hash. The keys are mapped to instance variables.
- {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
- end
-
-
- # Create a regular expression to match an attribute value based
- # on the equality operator (=, ^=, |=, etc).
- def attribute_match(equality, value)
- regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
- case equality
- when "=" then
- # Match the attribute value in full
- Regexp.new("^#{regexp}$")
- when "~=" then
- # Match a space-separated word within the attribute value
- Regexp.new("(^|\s)#{regexp}($|\s)")
- when "^="
- # Match the beginning of the attribute value
- Regexp.new("^#{regexp}")
- when "$="
- # Match the end of the attribute value
- Regexp.new("#{regexp}$")
- when "*="
- # Match substring of the attribute value
- regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
- when "|=" then
- # Match the first space-separated item of the attribute value
- Regexp.new("^#{regexp}($|\s)")
- else
- raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
- # Match all attributes values (existence check)
- //
- end
- end
-
-
- # Returns a lambda that can match an element against the nth-child
- # pseudo class, given the following arguments:
- # * +a+ -- Value of a part.
- # * +b+ -- Value of b part.
- # * +of_type+ -- True to test only elements of this type (of-type).
- # * +reverse+ -- True to count in reverse order (last-).
- def nth_child(a, b, of_type, reverse)
- # a = 0 means select at index b, if b = 0 nothing selected
- return lambda { |element| false } if a == 0 && b == 0
- # a < 0 and b < 0 will never match against an index
- return lambda { |element| false } if a < 0 && b < 0
- b = a + b + 1 if b < 0 # b < 0 just picks last element from each group
- b -= 1 unless b == 0 # b == 0 is same as b == 1, otherwise zero based
- lambda do |element|
- # Element must be inside parent element.
- return false unless element.parent && element.parent.tag?
- index = 0
- # Get siblings, reverse if counting from last.
- siblings = element.parent.children
- siblings = siblings.reverse if reverse
- # Match element name if of-type, otherwise ignore name.
- name = of_type ? element.name : nil
- found = false
- for child in siblings
- # Skip text nodes/comments.
- if child.tag? && (name == nil || child.name == name)
- if a == 0
- # Shortcut when a == 0 no need to go past count
- if index == b
- found = child.equal?(element)
- break
- end
- elsif a < 0
- # Only look for first b elements
- break if index > b
- if child.equal?(element)
- found = (index % a) == 0
- break
- end
- else
- # Otherwise, break if child found and count == an+b
- if child.equal?(element)
- found = (index % a) == b
- break
- end
- end
- index += 1
- end
- end
- found
- end
- end
-
-
- # Creates a only child lambda. Pass +of-type+ to only look at
- # elements of its type.
- def only_child(of_type)
- lambda do |element|
- # Element must be inside parent element.
- return false unless element.parent && element.parent.tag?
- name = of_type ? element.name : nil
- other = false
- for child in element.parent.children
- # Skip text nodes/comments.
- if child.tag? && (name == nil || child.name == name)
- unless child.equal?(element)
- other = true
- break
- end
- end
- end
- !other
- end
- end
-
-
- # Called to create a dependent selector (sibling, descendant, etc).
- # Passes the remainder of the statement that will be reduced to zero
- # eventually, and array of substitution values.
- #
- # This method is called from four places, so it helps to put it here
- # for reuse. The only logic deals with the need to detect comma
- # separators (alternate) and apply them to the selector group of the
- # top selector.
- def next_selector(statement, values)
- second = Selector.new(statement, values)
- # If there are alternate selectors, we group them in the top selector.
- if alternates = second.instance_variable_get(:@alternates)
- second.instance_variable_set(:@alternates, [])
- @alternates.concat alternates
- end
- second
- end
-
- end
-
-
- # See HTML::Selector.new
- def self.selector(statement, *values)
- Selector.new(statement, *values)
- end
-
-
- class Tag
-
- def select(selector, *values)
- selector = HTML::Selector.new(selector, values)
- selector.select(self)
- end
-
- end
-
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
deleted file mode 100644
index adf4e45930..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
+++ /dev/null
@@ -1,107 +0,0 @@
-require 'strscan'
-
-module HTML #:nodoc:
-
- # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
- # token is a string. Each string represents either "text", or an HTML element.
- #
- # This currently assumes valid XHTML, which means no free < or > characters.
- #
- # Usage:
- #
- # tokenizer = HTML::Tokenizer.new(text)
- # while token = tokenizer.next
- # p token
- # end
- class Tokenizer #:nodoc:
-
- # The current (byte) position in the text
- attr_reader :position
-
- # The current line number
- attr_reader :line
-
- # Create a new Tokenizer for the given text.
- def initialize(text)
- text.encode!
- @scanner = StringScanner.new(text)
- @position = 0
- @line = 0
- @current_line = 1
- end
-
- # Returns the next token in the sequence, or +nil+ if there are no more tokens in
- # the stream.
- def next
- return nil if @scanner.eos?
- @position = @scanner.pos
- @line = @current_line
- if @scanner.check(/<\S/)
- update_current_line(scan_tag)
- else
- update_current_line(scan_text)
- end
- end
-
- private
-
- # Treat the text at the current position as a tag, and scan it. Supports
- # comments, doctype tags, and regular tags, and ignores less-than and
- # greater-than characters within quoted strings.
- def scan_tag
- tag = @scanner.getch
- if @scanner.scan(/!--/) # comment
- tag << @scanner.matched
- tag << (@scanner.scan_until(/--\s*>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!\[CDATA\[/)
- tag << @scanner.matched
- tag << (@scanner.scan_until(/\]\]>/) || @scanner.scan_until(/\Z/))
- elsif @scanner.scan(/!/) # doctype
- tag << @scanner.matched
- tag << consume_quoted_regions
- else
- tag << consume_quoted_regions
- end
- tag
- end
-
- # Scan all text up to the next < character and return it.
- def scan_text
- "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
- end
-
- # Counts the number of newlines in the text and updates the current line
- # accordingly.
- def update_current_line(text)
- text.scan(/\r?\n/) { @current_line += 1 }
- end
-
- # Skips over quoted strings, so that less-than and greater-than characters
- # within the strings are ignored.
- def consume_quoted_regions
- text = ""
- loop do
- match = @scanner.scan_until(/['"<>]/) or break
-
- delim = @scanner.matched
- if delim == "<"
- match = match.chop
- @scanner.pos -= 1
- end
-
- text << match
- break if delim == "<" || delim == ">"
-
- # consume the quoted region
- while match = @scanner.scan_until(/[\\#{delim}]/)
- text << match
- break if @scanner.matched == delim
- break if @scanner.eos?
- text << @scanner.getch # skip the escaped character
- end
- end
- text
- end
- end
-
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/version.rb b/actionview/lib/action_view/vendor/html-scanner/html/version.rb
deleted file mode 100644
index 6d645c3e14..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/version.rb
+++ /dev/null
@@ -1,11 +0,0 @@
-module HTML #:nodoc:
- module Version #:nodoc:
-
- MAJOR = 0
- MINOR = 5
- TINY = 3
-
- STRING = [ MAJOR, MINOR, TINY ].join(".")
-
- end
-end