aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/lib/action_controller/vendor/html-scanner
diff options
context:
space:
mode:
authorSantiago Pastorino <santiago@wyeworks.com>2010-08-14 02:13:00 -0300
committerSantiago Pastorino <santiago@wyeworks.com>2010-08-14 04:12:33 -0300
commitb451de0d6de4df6bc66b274cec73b919f823d5ae (patch)
treef252c4143a0adb3be7d36d543282539cca0fb971 /actionpack/lib/action_controller/vendor/html-scanner
parent1590377886820e00b1a786616518a32f3b61ec0f (diff)
downloadrails-b451de0d6de4df6bc66b274cec73b919f823d5ae.tar.gz
rails-b451de0d6de4df6bc66b274cec73b919f823d5ae.tar.bz2
rails-b451de0d6de4df6bc66b274cec73b919f823d5ae.zip
Deletes trailing whitespaces (over text files only find * -type f -exec sed 's/[ \t]*$//' -i {} \;)
Diffstat (limited to 'actionpack/lib/action_controller/vendor/html-scanner')
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/document.rb4
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/node.rb58
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb50
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb2
-rw-r--r--actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb16
5 files changed, 65 insertions, 65 deletions
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
index b8d73c350d..7fa3aead82 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb
@@ -48,7 +48,7 @@ EOF
end
end
end
-
+
# Search the tree for (and return) the first node that matches the given
# conditions. The conditions are interpreted differently for different node
# types, see HTML::Text#find and HTML::Tag#find.
@@ -62,7 +62,7 @@ EOF
def find_all(conditions)
@root.find_all(conditions)
end
-
+
end
end
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
index a874519978..d581399514 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb
@@ -1,7 +1,7 @@
require 'strscan'
module HTML #:nodoc:
-
+
class Conditions < Hash #:nodoc:
def initialize(hash)
super()
@@ -57,17 +57,17 @@ module HTML #:nodoc:
class Node #:nodoc:
# The array of children of this node. Not all nodes have children.
attr_reader :children
-
+
# The parent node of this node. All nodes have a parent, except for the
# root node.
attr_reader :parent
-
+
# The line number of the input where this node was begun
attr_reader :line
-
+
# The byte position in the input where this node was begun
attr_reader :position
-
+
# Create a new node as a child of the given parent.
def initialize(parent, line=0, pos=0)
@parent = parent
@@ -92,7 +92,7 @@ module HTML #:nodoc:
# returns non +nil+. Returns the result of the #find call that succeeded.
def find(conditions)
conditions = validate_conditions(conditions)
- @children.each do |child|
+ @children.each do |child|
node = child.find(conditions)
return node if node
end
@@ -133,7 +133,7 @@ module HTML #:nodoc:
equivalent
end
-
+
class <<self
def parse(parent, line, pos, content, strict=true)
if content !~ /^<\S/
@@ -160,11 +160,11 @@ module HTML #:nodoc:
return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
end
-
+
closing = ( scanner.scan(/\//) ? :close : nil )
return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:-]+/)
name.downcase!
-
+
unless closing
scanner.skip(/\s*/)
attributes = {}
@@ -191,13 +191,13 @@ module HTML #:nodoc:
attributes[attr.downcase] = value
scanner.skip(/\s*/)
end
-
+
closing = ( scanner.scan(/\//) ? :self : nil )
end
-
+
unless scanner.scan(/\s*>/)
if strict
- raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
+ raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
else
# throw away all text until we find what we're looking for
scanner.skip_until(/>/) or scanner.terminate
@@ -212,9 +212,9 @@ module HTML #:nodoc:
# A node that represents text, rather than markup.
class Text < Node #:nodoc:
-
+
attr_reader :content
-
+
# Creates a new text node as a child of the given parent, with the given
# content.
def initialize(parent, line, pos, content)
@@ -240,7 +240,7 @@ module HTML #:nodoc:
def find(conditions)
match(conditions) && self
end
-
+
# Returns non-+nil+ if this node meets the given conditions, or +nil+
# otherwise. See the discussion of #find for the valid conditions.
def match(conditions)
@@ -268,7 +268,7 @@ module HTML #:nodoc:
content == node.content
end
end
-
+
# A CDATA node is simply a text node with a specialized way of displaying
# itself.
class CDATA < Text #:nodoc:
@@ -281,16 +281,16 @@ module HTML #:nodoc:
# closing tag, or a self-closing tag. It has a name, and may have a hash of
# attributes.
class Tag < Node #:nodoc:
-
+
# Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
attr_reader :closing
-
+
# Either +nil+, or a hash of attributes for this node.
attr_reader :attributes
# The name of this tag.
attr_reader :name
-
+
# Create a new node as a child of the given parent, using the given content
# to describe the node. It will be parsed and the node name, attributes and
# closing status extracted.
@@ -344,7 +344,7 @@ module HTML #:nodoc:
def tag?
true
end
-
+
# Returns +true+ if the node meets any of the given conditions. The
# +conditions+ parameter must be a hash of any of the following keys
# (all are optional):
@@ -404,7 +404,7 @@ module HTML #:nodoc:
# node.match :descendant => { :tag => "strong" }
#
# # test if the node has between 2 and 4 span tags as immediate children
- # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
+ # node.match :children => { :count => 2..4, :only => { :tag => "span" } }
#
# # get funky: test to see if the node is a "div", has a "ul" ancestor
# # and an "li" parent (with "class" = "enum"), and whether or not it has
@@ -439,7 +439,7 @@ module HTML #:nodoc:
# test children
return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
-
+
# test ancestors
if conditions[:ancestor]
return false unless catch :found do
@@ -457,13 +457,13 @@ module HTML #:nodoc:
child.match(:descendant => conditions[:descendant])
end
end
-
+
# count children
if opts = conditions[:children]
matches = children.select do |c|
(c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
end
-
+
matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
opts.each do |key, value|
next if key == :only
@@ -489,24 +489,24 @@ module HTML #:nodoc:
self_index = siblings.index(self)
if conditions[:sibling]
- return false unless siblings.detect do |s|
+ return false unless siblings.detect do |s|
s != self && s.match(conditions[:sibling])
end
end
if conditions[:before]
- return false unless siblings[self_index+1..-1].detect do |s|
+ return false unless siblings[self_index+1..-1].detect do |s|
s != self && s.match(conditions[:before])
end
end
if conditions[:after]
- return false unless siblings[0,self_index].detect do |s|
+ return false unless siblings[0,self_index].detect do |s|
s != self && s.match(conditions[:after])
end
end
end
-
+
true
end
@@ -515,7 +515,7 @@ module HTML #:nodoc:
return false unless closing == node.closing && self.name == node.name
attributes == node.attributes
end
-
+
private
# Match the given value to the given condition.
def match_condition(value, condition)
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb
index 51e0868995..dceddb9b80 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb
@@ -7,11 +7,11 @@ module HTML
return text unless sanitizeable?(text)
tokenize(text, options).join
end
-
+
def sanitizeable?(text)
!(text.nil? || text.empty? || !text.index("<"))
end
-
+
protected
def tokenize(text, options)
tokenizer = HTML::Tokenizer.new(text)
@@ -22,12 +22,12 @@ module HTML
end
result
end
-
+
def process_node(node, result, options)
result << node.to_s
end
end
-
+
class FullSanitizer < Sanitizer
def sanitize(text, options = {})
result = super
@@ -37,12 +37,12 @@ module HTML
# Recurse - handle all dirty nested tags
result == text ? result : sanitize(result, options)
end
-
+
def process_node(node, result, options)
result << node.to_s if node.class == HTML::Text
end
end
-
+
class LinkSanitizer < FullSanitizer
cattr_accessor :included_tags, :instance_writer => false
self.included_tags = Set.new(%w(a href))
@@ -50,13 +50,13 @@ module HTML
def sanitizeable?(text)
!(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
end
-
+
protected
def process_node(node, result, options)
- result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
+ result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name)
end
end
-
+
class WhiteListSanitizer < Sanitizer
[:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
:allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
@@ -66,35 +66,35 @@ module HTML
# A regular expression of the valid characters used to separate protocols like
# the ':' in 'http://foo.com'
self.protocol_separator = /:|(&#0*58)|(&#x70)|(%|&#37;)3A/
-
+
# Specifies a Set of HTML attributes that can have URIs.
self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
# Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
# to just escaping harmless tags like &lt;font&gt;
self.bad_tags = Set.new(%w(script))
-
+
# Specifies the default Set of tags that the #sanitize helper will allow unscathed.
- self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
- sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
+ self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
acronym a img blockquote del ins))
- # Specifies the default Set of html attributes that the #sanitize helper will leave
+ # Specifies the default Set of html attributes that the #sanitize helper will leave
# in the allowed tag.
self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
-
+
# Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
- self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
+ self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
feed svn urn aim rsync tag ssh sftp rtsp afs))
-
+
# Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
- self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
- border-color border-left-color border-right-color border-top-color clear color cursor direction display
+ self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
+ border-color border-left-color border-right-color border-top-color clear color cursor direction display
elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
width))
-
+
# Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center
collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
@@ -118,9 +118,9 @@ module HTML
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
if allowed_css_properties.include?(prop.downcase)
clean << prop + ': ' + val + ';'
- elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
+ elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
unless val.split().any? do |keyword|
- !allowed_css_keywords.include?(keyword) &&
+ !allowed_css_keywords.include?(keyword) &&
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
end
clean << prop + ': ' + val + ';'
@@ -146,7 +146,7 @@ module HTML
else
options[:parent].unshift node.name
end
-
+
process_attributes_for node, options
options[:tags].include?(node.name) ? node : nil
@@ -154,7 +154,7 @@ module HTML
bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
end
end
-
+
def process_attributes_for(node, options)
return unless node.attributes
node.attributes.keys.each do |attr_name|
@@ -169,7 +169,7 @@ module HTML
end
def contains_bad_protocols?(attr_name, value)
- uri_attributes.include?(attr_name) &&
+ uri_attributes.include?(attr_name) &&
(value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(%|&#37;)3A/ && !allowed_protocols.include?(value.split(protocol_separator).first))
end
end
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb
index e2c49c284f..0fe2e6d1a6 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb
@@ -182,7 +182,7 @@ module HTML
# not another using <tt>:not</tt>. For example:
# p:not(.post)
# Matches all paragraphs that do not have the class <tt>.post</tt>.
- #
+ #
# === Substitution Values
#
# You can use substitution with identifiers, class names and element values.
diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
index 240dc1890f..c252e01cf5 100644
--- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
+++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb
@@ -1,7 +1,7 @@
require 'strscan'
module HTML #:nodoc:
-
+
# A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each
# token is a string. Each string represents either "text", or an HTML element.
#
@@ -14,13 +14,13 @@ module HTML #:nodoc:
# p token
# end
class Tokenizer #:nodoc:
-
+
# The current (byte) position in the text
attr_reader :position
-
+
# The current line number
attr_reader :line
-
+
# Create a new Tokenizer for the given text.
def initialize(text)
text.encode! if text.encoding_aware?
@@ -42,7 +42,7 @@ module HTML #:nodoc:
update_current_line(scan_text)
end
end
-
+
private
# Treat the text at the current position as a tag, and scan it. Supports
@@ -69,13 +69,13 @@ module HTML #:nodoc:
def scan_text
"#{@scanner.getch}#{@scanner.scan(/[^<]*/)}"
end
-
+
# Counts the number of newlines in the text and updates the current line
# accordingly.
def update_current_line(text)
text.scan(/\r?\n/) { @current_line += 1 }
end
-
+
# Skips over quoted strings, so that less-than and greater-than characters
# within the strings are ignored.
def consume_quoted_regions
@@ -103,5 +103,5 @@ module HTML #:nodoc:
text
end
end
-
+
end