From 33019a321c7b8083068850750a3f4c466ae7c059 Mon Sep 17 00:00:00 2001
From: Timm
Date: Fri, 23 May 2014 23:34:46 +0200
Subject: Remove html-scanner and its tests.
---
actionview/lib/action_view/vendor/html-scanner.rb | 22 -
.../vendor/html-scanner/html/document.rb | 68 --
.../action_view/vendor/html-scanner/html/node.rb | 532 -------------
.../vendor/html-scanner/html/sanitizer.rb | 202 -----
.../vendor/html-scanner/html/selector.rb | 830 ---------------------
.../vendor/html-scanner/html/tokenizer.rb | 107 ---
.../vendor/html-scanner/html/version.rb | 11 -
.../test/template/html-scanner/cdata_node_test.rb | 16 -
.../test/template/html-scanner/document_test.rb | 149 ----
actionview/test/template/html-scanner/node_test.rb | 90 ---
.../test/template/html-scanner/tag_node_test.rb | 244 ------
.../test/template/html-scanner/text_node_test.rb | 51 --
.../test/template/html-scanner/tokenizer_test.rb | 132 ----
13 files changed, 2454 deletions(-)
delete mode 100644 actionview/lib/action_view/vendor/html-scanner.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/document.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/node.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/selector.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/tokenizer.rb
delete mode 100644 actionview/lib/action_view/vendor/html-scanner/html/version.rb
delete mode 100644 actionview/test/template/html-scanner/cdata_node_test.rb
delete mode 100644 actionview/test/template/html-scanner/document_test.rb
delete mode 100644 actionview/test/template/html-scanner/node_test.rb
delete mode 100644 actionview/test/template/html-scanner/tag_node_test.rb
delete mode 100644 actionview/test/template/html-scanner/text_node_test.rb
delete mode 100644 actionview/test/template/html-scanner/tokenizer_test.rb
(limited to 'actionview')
diff --git a/actionview/lib/action_view/vendor/html-scanner.rb b/actionview/lib/action_view/vendor/html-scanner.rb
deleted file mode 100644
index ef09b446a5..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-require 'active_support/deprecation'
-$LOAD_PATH.unshift "#{File.dirname(__FILE__)}/html-scanner"
-
-module HTML
- extend ActiveSupport::Autoload
-
- eager_autoload do
- autoload :Scanner, 'html/sanitizer'
- autoload :CDATA, 'html/node'
- autoload :Document, 'html/document'
- autoload :FullSanitizer, 'html/sanitizer'
- autoload :LinkSanitizer, 'html/sanitizer'
- autoload :Node, 'html/node'
- autoload :Sanitizer, 'html/sanitizer'
- autoload :Selector, 'html/selector'
- autoload :Tag, 'html/node'
- autoload :Text, 'html/node'
- autoload :Tokenizer, 'html/tokenizer'
- autoload :Version, 'html/version'
- autoload :WhiteListSanitizer, 'html/sanitizer'
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/document.rb b/actionview/lib/action_view/vendor/html-scanner/html/document.rb
deleted file mode 100644
index 386820300a..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/document.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-require 'html/tokenizer'
-require 'html/node'
-require 'html/selector'
-require 'html/sanitizer'
-
-module HTML #:nodoc:
- # A top-level HTML document. You give it a body of text, and it will parse that
- # text into a tree of nodes.
- class Document #:nodoc:
-
- # The root of the parsed document.
- attr_reader :root
-
- # Create a new Document from the given text.
- def initialize(text, strict=false, xml=false)
- tokenizer = Tokenizer.new(text)
- @root = Node.new(nil)
- node_stack = [ @root ]
- while token = tokenizer.next
- node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)
-
- node_stack.last.children << node unless node.tag? && node.closing == :close
- if node.tag?
- if node_stack.length > 1 && node.closing == :close
- if node_stack.last.name == node.name
- if node_stack.last.children.empty?
- node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
- end
- node_stack.pop
- else
- open_start = node_stack.last.position - 20
- open_start = 0 if open_start < 0
- close_start = node.position - 20
- close_start = 0 if close_start < 0
- msg = < hash } unless Hash === hash
- hash = keys_to_symbols(hash)
- hash.each do |k,v|
- case k
- when :tag, :content then
- # keys are valid, and require no further processing
- when :attributes then
- hash[k] = keys_to_strings(v)
- when :parent, :child, :ancestor, :descendant, :sibling, :before,
- :after
- hash[k] = Conditions.new(v)
- when :children
- hash[k] = v = keys_to_symbols(v)
- v.each do |key,value|
- case key
- when :count, :greater_than, :less_than
- # keys are valid, and require no further processing
- when :only
- v[key] = Conditions.new(value)
- else
- raise "illegal key #{key.inspect} => #{value.inspect}"
- end
- end
- else
- raise "illegal key #{k.inspect} => #{v.inspect}"
- end
- end
- update hash
- end
-
- private
-
- def keys_to_strings(hash)
- Hash[hash.keys.map {|k| [k.to_s, hash[k]]}]
- end
-
- def keys_to_symbols(hash)
- Hash[hash.keys.map do |k|
- raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
- [k.to_sym, hash[k]]
- end]
- end
- end
-
- # The base class of all nodes, textual and otherwise, in an HTML document.
- class Node #:nodoc:
- # The array of children of this node. Not all nodes have children.
- attr_reader :children
-
- # The parent node of this node. All nodes have a parent, except for the
- # root node.
- attr_reader :parent
-
- # The line number of the input where this node was begun
- attr_reader :line
-
- # The byte position in the input where this node was begun
- attr_reader :position
-
- # Create a new node as a child of the given parent.
- def initialize(parent, line=0, pos=0)
- @parent = parent
- @children = []
- @line, @position = line, pos
- end
-
- # Returns a textual representation of the node.
- def to_s
- @children.join()
- end
-
- # Returns false (subclasses must override this to provide specific matching
- # behavior.) +conditions+ may be of any type.
- def match(conditions)
- false
- end
-
- # Search the children of this node for the first node for which #find
- # returns non +nil+. Returns the result of the #find call that succeeded.
- def find(conditions)
- conditions = validate_conditions(conditions)
- @children.each do |child|
- node = child.find(conditions)
- return node if node
- end
- nil
- end
-
- # Search for all nodes that match the given conditions, and return them
- # as an array.
- def find_all(conditions)
- conditions = validate_conditions(conditions)
-
- matches = []
- matches << self if match(conditions)
- @children.each do |child|
- matches.concat child.find_all(conditions)
- end
- matches
- end
-
- # Returns +false+. Subclasses may override this if they define a kind of
- # tag.
- def tag?
- false
- end
-
- def validate_conditions(conditions)
- Conditions === conditions ? conditions : Conditions.new(conditions)
- end
-
- def ==(node)
- return false unless self.class == node.class && children.size == node.children.size
-
- equivalent = true
-
- children.size.times do |i|
- equivalent &&= children[i] == node.children[i]
- end
-
- equivalent
- end
-
- class </)
- if strict
- raise "expected ]]> (got #{scanner.rest.inspect} for #{content})"
- else
- scanner.skip_until(/\Z/)
- end
- end
-
- return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/\/]+/)
- name.downcase!
-
- unless closing
- scanner.skip(/\s*/)
- attributes = {}
- while attr = scanner.scan(/[-\w:]+/)
- value = true
- if scanner.scan(/\s*=\s*/)
- if delim = scanner.scan(/['"]/)
- value = ""
- while text = scanner.scan(/[^#{delim}\\]+|./)
- case text
- when "\\" then
- value << text
- break if scanner.eos?
- value << scanner.getch
- when delim
- break
- else value << text
- end
- end
- else
- value = scanner.scan(/[^\s>\/]+/)
- end
- end
- attributes[attr.downcase] = value
- scanner.skip(/\s*/)
- end
-
- closing = ( scanner.scan(/\//) ? :self : nil )
- end
-
- unless scanner.scan(/\s*>/)
- if strict
- raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
- else
- # throw away all text until we find what we're looking for
- scanner.skip_until(/>/) or scanner.terminate
- end
- end
-
- Tag.new(parent, line, pos, name, attributes, closing)
- end
- end
- end
- end
-
- # A node that represents text, rather than markup.
- class Text < Node #:nodoc:
-
- attr_reader :content
-
- # Creates a new text node as a child of the given parent, with the given
- # content.
- def initialize(parent, line, pos, content)
- super(parent, line, pos)
- @content = content
- end
-
- # Returns the content of this node.
- def to_s
- @content
- end
-
- # Returns +self+ if this node meets the given conditions. Text nodes support
- # conditions of the following kinds:
- #
- # * if +conditions+ is a string, it must be a substring of the node's
- # content
- # * if +conditions+ is a regular expression, it must match the node's
- # content
- # * if +conditions+ is a hash, it must contain a :content key that
- # is either a string or a regexp, and which is interpreted as described
- # above.
- def find(conditions)
- match(conditions) && self
- end
-
- # Returns non-+nil+ if this node meets the given conditions, or +nil+
- # otherwise. See the discussion of #find for the valid conditions.
- def match(conditions)
- case conditions
- when String
- @content == conditions
- when Regexp
- @content =~ conditions
- when Hash
- conditions = validate_conditions(conditions)
-
- # Text nodes only have :content, :parent, :ancestor
- unless (conditions.keys - [:content, :parent, :ancestor]).empty?
- return false
- end
-
- match(conditions[:content])
- else
- nil
- end
- end
-
- def ==(node)
- return false unless super
- content == node.content
- end
- end
-
- # A CDATA node is simply a text node with a specialized way of displaying
- # itself.
- class CDATA < Text #:nodoc:
- def to_s
- ""
- end
- end
-
- # A Tag is any node that represents markup. It may be an opening tag, a
- # closing tag, or a self-closing tag. It has a name, and may have a hash of
- # attributes.
- class Tag < Node #:nodoc:
-
- # Either +nil+, :close, or :self
- attr_reader :closing
-
- # Either +nil+, or a hash of attributes for this node.
- attr_reader :attributes
-
- # The name of this tag.
- attr_reader :name
-
- # Create a new node as a child of the given parent, using the given content
- # to describe the node. It will be parsed and the node name, attributes and
- # closing status extracted.
- def initialize(parent, line, pos, name, attributes, closing)
- super(parent, line, pos)
- @name = name
- @attributes = attributes
- @closing = closing
- end
-
- # A convenience for obtaining an attribute of the node. Returns +nil+ if
- # the node has no attributes.
- def [](attr)
- @attributes ? @attributes[attr] : nil
- end
-
- # Returns non-+nil+ if this tag can contain child nodes.
- def childless?(xml = false)
- return false if xml && @closing.nil?
- !@closing.nil? ||
- @name =~ /^(img|br|hr|link|meta|area|base|basefont|
- col|frame|input|isindex|param)$/ox
- end
-
- # Returns a textual representation of the node
- def to_s
- if @closing == :close
- "#{@name}>"
- else
- s = "<#{@name}"
- @attributes.each do |k,v|
- s << " #{k}"
- s << "=\"#{v}\"" if String === v
- end
- s << " /" if @closing == :self
- s << ">"
- @children.each { |child| s << child.to_s }
- s << "#{@name}>" if @closing != :self && !@children.empty?
- s
- end
- end
-
- # If either the node or any of its children meet the given conditions, the
- # matching node is returned. Otherwise, +nil+ is returned. (See the
- # description of the valid conditions in the +match+ method.)
- def find(conditions)
- match(conditions) && self || super
- end
-
- # Returns +true+, indicating that this node represents an HTML tag.
- def tag?
- true
- end
-
- # Returns +true+ if the node meets any of the given conditions. The
- # +conditions+ parameter must be a hash of any of the following keys
- # (all are optional):
- #
- # * :tag: the node name must match the corresponding value
- # * :attributes: a hash. The node's values must match the
- # corresponding values in the hash.
- # * :parent: a hash. The node's parent must match the
- # corresponding hash.
- # * :child: a hash. At least one of the node's immediate children
- # must meet the criteria described by the hash.
- # * :ancestor: a hash. At least one of the node's ancestors must
- # meet the criteria described by the hash.
- # * :descendant: a hash. At least one of the node's descendants
- # must meet the criteria described by the hash.
- # * :sibling: a hash. At least one of the node's siblings must
- # meet the criteria described by the hash.
- # * :after: a hash. The node must be after any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * :before: a hash. The node must be before any sibling meeting
- # the criteria described by the hash, and at least one sibling must match.
- # * :children: a hash, for counting children of a node. Accepts the
- # keys:
- # ** :count: either a number or a range which must equal (or
- # include) the number of children that match.
- # ** :less_than: the number of matching children must be less than
- # this number.
- # ** :greater_than: the number of matching children must be
- # greater than this number.
- # ** :only: another hash consisting of the keys to use
- # to match on the children, and only matching children will be
- # counted.
- #
- # Conditions are matched using the following algorithm:
- #
- # * if the condition is a string, it must be a substring of the value.
- # * if the condition is a regexp, it must match the value.
- # * if the condition is a number, the value must match number.to_s.
- # * if the condition is +true+, the value must not be +nil+.
- # * if the condition is +false+ or +nil+, the value must be +nil+.
- #
- # Usage:
- #
- # # test if the node is a "span" tag
- # node.match tag: "span"
- #
- # # test if the node's parent is a "div"
- # node.match parent: { tag: "div" }
- #
- # # test if any of the node's ancestors are "table" tags
- # node.match ancestor: { tag: "table" }
- #
- # # test if any of the node's immediate children are "em" tags
- # node.match child: { tag: "em" }
- #
- # # test if any of the node's descendants are "strong" tags
- # node.match descendant: { tag: "strong" }
- #
- # # test if the node has between 2 and 4 span tags as immediate children
- # node.match children: { count: 2..4, only: { tag: "span" } }
- #
- # # get funky: test to see if the node is a "div", has a "ul" ancestor
- # # and an "li" parent (with "class" = "enum"), and whether or not it has
- # # a "span" descendant that contains # text matching /hello world/:
- # node.match tag: "div",
- # ancestor: { tag: "ul" },
- # parent: { tag: "li",
- # attributes: { class: "enum" } },
- # descendant: { tag: "span",
- # child: /hello world/ }
- def match(conditions)
- conditions = validate_conditions(conditions)
- # check content of child nodes
- if conditions[:content]
- if children.empty?
- return false unless match_condition("", conditions[:content])
- else
- return false unless children.find { |child| child.match(conditions[:content]) }
- end
- end
-
- # test the name
- return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
-
- # test attributes
- (conditions[:attributes] || {}).each do |key, value|
- return false unless match_condition(self[key], value)
- end
-
- # test parent
- return false unless parent.match(conditions[:parent]) if conditions[:parent]
-
- # test children
- return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
-
- # test ancestors
- if conditions[:ancestor]
- return false unless catch :found do
- p = self
- throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
- end
- end
-
- # test descendants
- if conditions[:descendant]
- return false unless children.find do |child|
- # test the child
- child.match(conditions[:descendant]) ||
- # test the child's descendants
- child.match(:descendant => conditions[:descendant])
- end
- end
-
- # count children
- if opts = conditions[:children]
- matches = children.select do |c|
- (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
- end
-
- matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
- opts.each do |key, value|
- next if key == :only
- case key
- when :count
- if Integer === value
- return false if matches.length != value
- else
- return false unless value.include?(matches.length)
- end
- when :less_than
- return false unless matches.length < value
- when :greater_than
- return false unless matches.length > value
- else raise "unknown count condition #{key}"
- end
- end
- end
-
- # test siblings
- if conditions[:sibling] || conditions[:before] || conditions[:after]
- siblings = parent ? parent.children : []
- self_index = siblings.index(self)
-
- if conditions[:sibling]
- return false unless siblings.detect do |s|
- s != self && s.match(conditions[:sibling])
- end
- end
-
- if conditions[:before]
- return false unless siblings[self_index+1..-1].detect do |s|
- s != self && s.match(conditions[:before])
- end
- end
-
- if conditions[:after]
- return false unless siblings[0,self_index].detect do |s|
- s != self && s.match(conditions[:after])
- end
- end
- end
-
- true
- end
-
- def ==(node)
- return false unless super
- return false unless closing == node.closing && self.name == node.name
- attributes == node.attributes
- end
-
- private
- # Match the given value to the given condition.
- def match_condition(value, condition)
- case condition
- when String
- value && value == condition
- when Regexp
- value && value.match(condition)
- when Numeric
- value == condition.to_s
- when true
- !value.nil?
- when false, nil
- value.nil?
- else
- false
- end
- end
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb b/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb
deleted file mode 100644
index 36ec3ef6b3..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/sanitizer.rb
+++ /dev/null
@@ -1,202 +0,0 @@
-require 'set'
-require 'cgi'
-require 'active_support/core_ext/module/attribute_accessors'
-
-module HTML
- module Scanner
- def full_sanitizer
- HTML::FullSanitizer
- end
-
- def link_sanitizer
- HTML::LinkSanitizer
- end
-
- def white_list_sanitizer
- HTML::WhiteListSanitizer
- end
- end
-
- class Sanitizer
- def sanitize(text, options = {})
- validate_options(options)
- return text unless sanitizeable?(text)
- tokenize(text, options).join
- end
-
- def sanitizeable?(text)
- !(text.nil? || text.empty? || !text.index("<"))
- end
-
- protected
- def tokenize(text, options)
- tokenizer = HTML::Tokenizer.new(text)
- result = []
- while token = tokenizer.next
- node = Node.parse(nil, 0, 0, token, false)
- process_node node, result, options
- end
- result
- end
-
- def process_node(node, result, options)
- result << node.to_s
- end
-
- def validate_options(options)
- if options[:tags] && !options[:tags].is_a?(Enumerable)
- raise ArgumentError, "You should pass :tags as an Enumerable"
- end
-
- if options[:attributes] && !options[:attributes].is_a?(Enumerable)
- raise ArgumentError, "You should pass :attributes as an Enumerable"
- end
- end
- end
-
- class FullSanitizer < Sanitizer
- def sanitize(text, options = {})
- result = super
- # strip any comments, and if they have a newline at the end (ie. line with
- # only a comment) strip that too
- result = result.gsub(/[\n]?/m, "") if (result && result =~ /[\n]?/m)
- # Recurse - handle all dirty nested tags
- result == text ? result : sanitize(result, options)
- end
-
- def process_node(node, result, options)
- result << node.to_s if node.class == HTML::Text
- end
- end
-
- class LinkSanitizer < FullSanitizer
- cattr_accessor :included_tags, :instance_writer => false
- self.included_tags = Set.new(%w(a href))
-
- def sanitizeable?(text)
- !(text.nil? || text.empty? || !((text.index("")))
- end
-
- protected
- def process_node(node, result, options)
- result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
- end
- end
-
- class WhiteListSanitizer < Sanitizer
- [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
- :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
- class_attribute attr, :instance_writer => false
- end
-
- # A regular expression of the valid characters used to separate protocols like
- # the ':' in 'http://foo.com'
- self.protocol_separator = /:|(*58)|(p)|(*3a)|(%|%)3A/i
-
- # Specifies a Set of HTML attributes that can have URIs.
- self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
-
- # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
- # to just escaping harmless tags like <font>
- self.bad_tags = Set.new(%w(script))
-
- # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
- self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
- sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
- acronym a img blockquote del ins))
-
- # Specifies the default Set of html attributes that the #sanitize helper will leave
- # in the allowed tag.
- self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
-
- # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
- self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
- feed svn urn aim rsync tag ssh sftp rtsp afs))
-
- # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
- self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
- border-color border-left-color border-right-color border-top-color clear color cursor direction display
- elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
- overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
- speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
- width))
-
- # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
- self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
- collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
- nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
-
- # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
- self.shorthand_css_properties = Set.new(%w(background border margin padding))
-
- # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
- def sanitize_css(style)
- # disallow urls
- style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
-
- # gauntlet
- if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ ||
- style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/
- return ''
- end
-
- clean = []
- style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
- if allowed_css_properties.include?(prop.downcase)
- clean << prop + ': ' + val + ';'
- elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
- unless val.split().any? do |keyword|
- !allowed_css_keywords.include?(keyword) &&
- keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
- end
- clean << prop + ': ' + val + ';'
- end
- end
- end
- clean.join(' ')
- end
-
- protected
- def tokenize(text, options)
- options[:parent] = []
- options[:attributes] ||= allowed_attributes
- options[:tags] ||= allowed_tags
- super
- end
-
- def process_node(node, result, options)
- result << case node
- when HTML::Tag
- if node.closing == :close
- options[:parent].shift
- else
- options[:parent].unshift node.name
- end
-
- process_attributes_for node, options
-
- options[:tags].include?(node.name) ? node : nil
- else
- bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/, "<")
- end
- end
-
- def process_attributes_for(node, options)
- return unless node.attributes
- node.attributes.keys.each do |attr_name|
- value = node.attributes[attr_name].to_s
-
- if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
- node.attributes.delete(attr_name)
- else
- node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value))
- end
- end
- end
-
- def contains_bad_protocols?(attr_name, value)
- uri_attributes.include?(attr_name) &&
- (value =~ /(^[^\/:]*):|(*58)|(p)|(*3a)|(%|%)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip))
- end
- end
-end
diff --git a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb b/actionview/lib/action_view/vendor/html-scanner/html/selector.rb
deleted file mode 100644
index dfdd724b9b..0000000000
--- a/actionview/lib/action_view/vendor/html-scanner/html/selector.rb
+++ /dev/null
@@ -1,830 +0,0 @@
-#--
-# Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
-# Under MIT and/or CC By license.
-#++
-
-module HTML
-
- # Selects HTML elements using CSS 2 selectors.
- #
- # The +Selector+ class uses CSS selector expressions to match and select
- # HTML elements.
- #
- # For example:
- # selector = HTML::Selector.new "form.login[action=/login]"
- # creates a new selector that matches any +form+ element with the class
- # +login+ and an attribute +action+ with the value /login.
- #
- # === Matching Elements
- #
- # Use the #match method to determine if an element matches the selector.
- #
- # For simple selectors, the method returns an array with that element,
- # or +nil+ if the element does not match. For complex selectors (see below)
- # the method returns an array with all matched elements, of +nil+ if no
- # match found.
- #
- # For example:
- # if selector.match(element)
- # puts "Element is a login form"
- # end
- #
- # === Selecting Elements
- #
- # Use the #select method to select all matching elements starting with
- # one element and going through all children in depth-first order.
- #
- # This method returns an array of all matching elements, an empty array
- # if no match is found
- #
- # For example:
- # selector = HTML::Selector.new "input[type=text]"
- # matches = selector.select(element)
- # matches.each do |match|
- # puts "Found text field with name #{match.attributes['name']}"
- # end
- #
- # === Expressions
- #
- # Selectors can match elements using any of the following criteria:
- # * name -- Match an element based on its name (tag name).
- # For example, p to match a paragraph. You can use *
- # to match any element.
- # * #id -- Match an element based on its identifier (the
- # id attribute). For example, #page.
- # * .class -- Match an element based on its class name, all
- # class names if more than one specified.
- # * [attr] -- Match an element that has the specified attribute.
- # * [attr=value] -- Match an element that has the specified
- # attribute and value. (More operators are supported see below)
- # * :pseudo-class -- Match an element based on a pseudo class,
- # such as :nth-child and :empty.
- # * :not(expr) -- Match an element that does not match the
- # negation expression.
- #
- # When using a combination of the above, the element name comes first
- # followed by identifier, class names, attributes, pseudo classes and
- # negation in any order. Do not separate these parts with spaces!
- # Space separation is used for descendant selectors.
- #
- # For example:
- # selector = HTML::Selector.new "form.login[action=/login]"
- # The matched element must be of type +form+ and have the class +login+.
- # It may have other classes, but the class +login+ is required to match.
- # It must also have an attribute called +action+ with the value
- # /login.
- #
- # This selector will match the following element:
- #
]]>", @node.to_s
- end
-
- def test_content
- assert_equal "howdy
", @node.content
- end
-end
diff --git a/actionview/test/template/html-scanner/document_test.rb b/actionview/test/template/html-scanner/document_test.rb
deleted file mode 100644
index 7b7518e130..0000000000
--- a/actionview/test/template/html-scanner/document_test.rb
+++ /dev/null
@@ -1,149 +0,0 @@
-require 'abstract_unit'
-require 'action_view/vendor/html-scanner'
-
-class DocumentTest < ActiveSupport::TestCase
- def test_handle_doctype
- doc = nil
- assert_nothing_raised do
- doc = HTML::Document.new <<-HTML.strip
-
-
-
- HTML
- end
- assert_equal 3, doc.root.children.length
- assert_equal %{}, doc.root.children[0].content
- assert_match %r{\s+}m, doc.root.children[1].content
- assert_equal "html", doc.root.children[2].name
- end
-
- def test_find_img
- doc = HTML::Document.new <<-HTML.strip
-
-
- ![](hello.gif)
-
-
- HTML
- assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"})
- end
-
- def test_find_all
- doc = HTML::Document.new <<-HTML.strip
-
-
- ![](hello.gif)
-
-
something
-
here is more
-
-
-
- HTML
- all = doc.find_all :attributes => { :class => "test" }
- assert_equal 3, all.length
- assert_equal [ "p", "p", "em" ], all.map { |n| n.name }
- end
-
- def test_find_with_text
- doc = HTML::Document.new <<-HTML.strip
-
-
- Some text
-
-
- HTML
- assert doc.find(:content => "Some text")
- assert doc.find(:tag => "p", :child => { :content => "Some text" })
- assert doc.find(:tag => "p", :child => "Some text")
- assert doc.find(:tag => "p", :content => "Some text")
- end
-
- def test_parse_xml
- assert_nothing_raised { HTML::Document.new("", true, true) }
- assert_nothing_raised { HTML::Document.new("something", true, true) }
- end
-
- def test_parse_document
- doc = HTML::Document.new(<<-HTML)
-
- HTML
- assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } })
- end
-
- def test_tag_nesting_nothing_to_s
- doc = HTML::Document.new("")
- assert_equal "", doc.root.to_s
- end
-
- def test_tag_nesting_space_to_s
- doc = HTML::Document.new(" ")
- assert_equal " ", doc.root.to_s
- end
-
- def test_tag_nesting_text_to_s
- doc = HTML::Document.new("text")
- assert_equal "text", doc.root.to_s
- end
-
- def test_tag_nesting_tag_to_s
- doc = HTML::Document.new("")
- assert_equal "", doc.root.to_s
- end
-
- def test_parse_cdata
- doc = HTML::Document.new(<<-HTML)
-
-
-
- ]]>
-
-
- this document has <br> for a title
-
-
-HTML
-
- assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" })
- assert doc.find(:tag => "title", :child => "
")
- end
-
- def test_find_empty_tag
- doc = HTML::Document.new("")
- assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./)
- assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/)
- assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/)
- assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "")
- assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil)
- end
-
- def test_parse_invalid_document
- assert_nothing_raised do
- HTML::Document.new("
-
- ")
- end
- end
-
- def test_invalid_document_raises_exception_when_strict
- assert_raise RuntimeError do
- HTML::Document.new("
-
- ", true)
- end
- end
-
-end
diff --git a/actionview/test/template/html-scanner/node_test.rb b/actionview/test/template/html-scanner/node_test.rb
deleted file mode 100644
index a2734dfcfe..0000000000
--- a/actionview/test/template/html-scanner/node_test.rb
+++ /dev/null
@@ -1,90 +0,0 @@
-require 'abstract_unit'
-require 'action_view/vendor/html-scanner/html/node'
-
-class NodeTest < ActiveSupport::TestCase
-
- class MockNode
- def initialize(matched, value)
- @matched = matched
- @value = value
- end
-
- def find(conditions)
- @matched && self
- end
-
- def to_s
- @value.to_s
- end
- end
-
- def setup
- @node = HTML::Node.new("parent")
- @node.children.concat [MockNode.new(false,1), MockNode.new(true,"two"), MockNode.new(false,:three)]
- end
-
- def test_match
- assert !@node.match("foo")
- end
-
- def test_tag
- assert !@node.tag?
- end
-
- def test_to_s
- assert_equal "1twothree", @node.to_s
- end
-
- def test_find
- assert_equal "two", @node.find('blah').to_s
- end
-
- def test_parse_strict
- s = ""
- assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) }
- end
-
- def test_parse_relaxed
- s = ""
- node = nil
- assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
- assert node.attributes.has_key?("foo")
- assert !node.attributes.has_key?("bar")
- end
-
- def test_to_s_with_boolean_attrs
- s = ""
- node = HTML::Node.parse(nil,0,0,s)
- assert node.attributes.has_key?("foo")
- assert node.attributes.has_key?("bar")
- assert "", node.to_s
- end
-
- def test_parse_with_unclosed_tag
- s = "contents', node.content
- end
-
- def test_parse_strict_with_unterminated_cdata_section
- s = "")
- assert_equal "tag", node.name
- assert_equal Hash.new, node.attributes
- assert_nil node.closing
- end
-
- def test_open_with_attributes
- node = tag("")
- assert_equal "tag1", node.name
- assert_equal "hey_ho", node["foo"]
- assert_equal "blah blah", node["x:bar"]
- assert_equal "blah blah blah", node["baz"]
- end
-
- def test_self_closing_without_attributes
- node = tag("")
- assert_equal "tag", node.name
- assert_equal Hash.new, node.attributes
- assert_equal :self, node.closing
- end
-
- def test_self_closing_with_attributes
- node = tag("")
- assert_equal "tag", node.name
- assert_equal( { "a" => "b" }, node.attributes )
- assert_equal :self, node.closing
- end
-
- def test_closing_without_attributes
- node = tag("")
- assert_equal "tag", node.name
- assert_nil node.attributes
- assert_equal :close, node.closing
- end
-
- def test_bracket_op_when_no_attributes
- node = tag("")
- assert_nil node["foo"]
- end
-
- def test_bracket_op_when_attributes
- node = tag("")
- assert_equal "b", node["a"]
- end
-
- def test_attributes_with_escaped_quotes
- node = tag("")
- assert_equal "b\\'c", node["a"]
- assert_equal "bob \\\"float\\\"", node["b"]
- end
-
- def test_to_s
- node = tag("")
- node = node.to_s
- assert node.include?('a')
- assert node.include?('b="c"')
- assert node.include?('d="f"')
- assert node.include?('g="h')
- assert node.include?('i')
- end
-
- def test_tag
- assert tag("").tag?
- end
-
- def test_match_tag_as_string
- assert tag("").match(:tag => "tag")
- assert !tag("").match(:tag => "b")
- end
-
- def test_match_tag_as_regexp
- assert tag("").match(:tag => /t.g/)
- assert !tag("").match(:tag => /t[bqs]g/)
- end
-
- def test_match_attributes_as_string
- t = tag("")
- assert t.match(:attributes => {"a" => "something"})
- assert t.match(:attributes => {"b" => "else"})
- end
-
- def test_match_attributes_as_regexp
- t = tag("")
- assert t.match(:attributes => {"a" => /^something$/})
- assert t.match(:attributes => {"b" => /e.*e/})
- assert t.match(:attributes => {"a" => /me..i/, "b" => /.ls.$/})
- end
-
- def test_match_attributes_as_number
- t = tag("")
- assert t.match(:attributes => {"a" => 15})
- assert t.match(:attributes => {"b" => 3.1415})
- assert t.match(:attributes => {"a" => 15, "b" => 3.1415})
- end
-
- def test_match_attributes_exist
- t = tag("")
- assert t.match(:attributes => {"a" => true})
- assert t.match(:attributes => {"b" => true})
- assert t.match(:attributes => {"a" => true, "b" => true})
- end
-
- def test_match_attributes_not_exist
- t = tag("")
- assert t.match(:attributes => {"c" => false})
- assert t.match(:attributes => {"c" => nil})
- assert t.match(:attributes => {"a" => true, "c" => false})
- end
-
- def test_match_parent_success
- t = tag("", tag(""))
- assert t.match(:parent => {:tag => "foo", :attributes => {"k" => /v.l/, "j" => false}})
- end
-
- def test_match_parent_fail
- t = tag("", tag(""))
- assert !t.match(:parent => {:tag => /kafka/})
- end
-
- def test_match_child_success
- t = tag("")
- tag("", t)
- tag("", t)
- assert t.match(:child => { :tag => "sib", :attributes => {"v" => /j/}})
- assert t.match(:child => { :attributes => {"a" => "kelly"}})
- end
-
- def test_match_child_fail
- t = tag("")
- tag("", t)
- tag("", t)
- assert !t.match(:child => { :tag => "sib", :attributes => {"v" => /r/}})
- assert !t.match(:child => { :attributes => {"v" => false}})
- end
-
- def test_match_ancestor_success
- t = tag("", tag("", tag("")))
- assert t.match(:ancestor => {:tag => "parent", :attributes => {"a" => /ll/}})
- assert t.match(:ancestor => {:attributes => {"m" => "vaughn"}})
- end
-
- def test_match_ancestor_fail
- t = tag("", tag("", tag("")))
- assert !t.match(:ancestor => {:tag => /^parent/, :attributes => {"v" => /m/}})
- assert !t.match(:ancestor => {:attributes => {"v" => false}})
- end
-
- def test_match_descendant_success
- tag("", tag("", t = tag("")))
- assert t.match(:descendant => {:tag => "child", :attributes => {"a" => /ll/}})
- assert t.match(:descendant => {:attributes => {"m" => "vaughn"}})
- end
-
- def test_match_descendant_fail
- tag("", tag("", t = tag("")))
- assert !t.match(:descendant => {:tag => /^child/, :attributes => {"v" => /m/}})
- assert !t.match(:descendant => {:attributes => {"v" => false}})
- end
-
- def test_match_child_count
- t = tag("")
- tag("hello", t)
- tag("", t)
- tag("", t)
- assert t.match(:children => { :count => 2 })
- assert t.match(:children => { :count => 2..4 })
- assert t.match(:children => { :less_than => 4 })
- assert t.match(:children => { :greater_than => 1 })
- assert !t.match(:children => { :count => 3 })
- end
-
- def test_conditions_as_strings
- t = tag("")
- assert t.match("tag" => "tag")
- assert t.match("attributes" => { "x:k" => "something" })
- assert !t.match("tag" => "gat")
- assert !t.match("attributes" => { "x:j" => "something" })
- end
-
- def test_attributes_as_symbols
- t = tag("")
- assert t.match(:attributes => { :v => /oh/ })
- assert t.match(:attributes => { :a => /ll/ })
- end
-
- def test_match_sibling
- t = tag("")
- tag("hello", t)
- tag("", t)
- tag("world", t)
- m = tag("", t)
- tag("", t)
-
- assert m.match(:sibling => {:tag => "span", :attributes => {:a => true}})
- assert m.match(:sibling => {:tag => "span", :attributes => {:m => true}})
- assert !m.match(:sibling => {:tag => "span", :attributes => {:k => true}})
- end
-
- def test_match_sibling_before
- t = tag("")
- tag("hello", t)
- tag("", t)
- tag("world", t)
- m = tag("", t)
- tag("", t)
-
- assert m.match(:before => {:tag => "span", :attributes => {:m => true}})
- assert !m.match(:before => {:tag => "span", :attributes => {:a => true}})
- assert !m.match(:before => {:tag => "span", :attributes => {:k => true}})
- end
-
- def test_match_sibling_after
- t = tag("")
- tag("hello", t)
- tag("", t)
- tag("world", t)
- m = tag("", t)
- tag("", t)
-
- assert m.match(:after => {:tag => "span", :attributes => {:a => true}})
- assert !m.match(:after => {:tag => "span", :attributes => {:m => true}})
- assert !m.match(:after => {:tag => "span", :attributes => {:k => true}})
- end
-
- def test_tag_to_s
- t = tag("")
- tag("hello", t)
- tag("
", t)
- assert_equal %(hello
), t.to_s
- end
-
- private
-
- def tag(content, parent=nil)
- node = HTML::Node.parse(parent,0,0,content)
- parent.children << node if parent
- node
- end
-end
diff --git a/actionview/test/template/html-scanner/text_node_test.rb b/actionview/test/template/html-scanner/text_node_test.rb
deleted file mode 100644
index d8ab667adf..0000000000
--- a/actionview/test/template/html-scanner/text_node_test.rb
+++ /dev/null
@@ -1,51 +0,0 @@
-require 'abstract_unit'
-require 'action_view/vendor/html-scanner/html/node'
-
-class TextNodeTest < ActiveSupport::TestCase
- def setup
- @node = HTML::Text.new(nil, 0, 0, "hello, howdy, aloha, annyeong")
- end
-
- def test_to_s
- assert_equal "hello, howdy, aloha, annyeong", @node.to_s
- end
-
- def test_find_string
- assert_equal @node, @node.find("hello, howdy, aloha, annyeong")
- assert_equal false, @node.find("bogus")
- end
-
- def test_find_regexp
- assert_equal @node, @node.find(/an+y/)
- assert_nil @node.find(/b/)
- end
-
- def test_find_hash
- assert_equal @node, @node.find(:content => /howdy/)
- assert_nil @node.find(:content => /^howdy$/)
- assert_equal false, @node.find(:content => "howdy")
- end
-
- def test_find_other
- assert_nil @node.find(:hello)
- end
-
- def test_match_string
- assert @node.match("hello, howdy, aloha, annyeong")
- assert_equal false, @node.match("bogus")
- end
-
- def test_match_regexp
- assert_not_nil @node, @node.match(/an+y/)
- assert_nil @node.match(/b/)
- end
-
- def test_match_hash
- assert_not_nil @node, @node.match(:content => "howdy")
- assert_nil @node.match(:content => /^howdy$/)
- end
-
- def test_match_other
- assert_nil @node.match(:hello)
- end
-end
diff --git a/actionview/test/template/html-scanner/tokenizer_test.rb b/actionview/test/template/html-scanner/tokenizer_test.rb
deleted file mode 100644
index d1cdd53211..0000000000
--- a/actionview/test/template/html-scanner/tokenizer_test.rb
+++ /dev/null
@@ -1,132 +0,0 @@
-require 'abstract_unit'
-require 'action_view/vendor/html-scanner/html/tokenizer'
-
-class TokenizerTest < ActiveSupport::TestCase
-
- def test_blank
- tokenize ""
- assert_end
- end
-
- def test_space
- tokenize " "
- assert_next " "
- assert_end
- end
-
- def test_tag_simple_open
- tokenize ""
- assert_next ""
- assert_end
- end
-
- def test_tag_simple_self_closing
- tokenize ""
- assert_next ""
- assert_end
- end
-
- def test_tag_simple_closing
- tokenize ""
- assert_next ""
- end
-
- def test_tag_with_single_quoted_attribute
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_single_quoted_attribute_with_escape
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_double_quoted_attribute
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_double_quoted_attribute_with_escape
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_unquoted_attribute
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_lt_char_in_attribute
- tokenize %{x}
- assert_next %{}
- end
-
- def test_tag_with_gt_char_in_attribute
- tokenize %{x}
- assert_next %{}
- end
-
- def test_doctype_tag
- tokenize %{\n }
- assert_next %{}
- assert_next %{\n }
- assert_next %{}
- end
-
- def test_cdata_tag
- tokenize %{]]>}
- assert_next %{]]>}
- assert_end
- end
-
- def test_unterminated_cdata_tag
- tokenize %{}
- assert_next %{ world}
- assert_next %{original }
- assert_next %{< hello > world}
- end
-
- def test_less_than_without_matching_greater_than
- tokenize %{hello foo\nbar}
- assert_next %{hello }
- assert_next %{}
- assert_next %{foo}
- assert_next %{}
- assert_next %{\nbar}
- assert_next %{}
- assert_end
- end
-
- def test_unterminated_comment
- tokenize %{hello