From b451de0d6de4df6bc66b274cec73b919f823d5ae Mon Sep 17 00:00:00 2001 From: Santiago Pastorino Date: Sat, 14 Aug 2010 02:13:00 -0300 Subject: Deletes trailing whitespaces (over text files only find * -type f -exec sed 's/[ \t]*$//' -i {} \;) --- .../vendor/html-scanner/html/document.rb | 4 +- .../vendor/html-scanner/html/node.rb | 58 +++++++++++----------- .../vendor/html-scanner/html/sanitizer.rb | 50 +++++++++---------- .../vendor/html-scanner/html/selector.rb | 2 +- .../vendor/html-scanner/html/tokenizer.rb | 16 +++--- 5 files changed, 65 insertions(+), 65 deletions(-) (limited to 'actionpack/lib/action_controller/vendor/html-scanner/html') diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb index b8d73c350d..7fa3aead82 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/document.rb @@ -48,7 +48,7 @@ EOF end end end - + # Search the tree for (and return) the first node that matches the given # conditions. The conditions are interpreted differently for different node # types, see HTML::Text#find and HTML::Tag#find. @@ -62,7 +62,7 @@ EOF def find_all(conditions) @root.find_all(conditions) end - + end end diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb index a874519978..d581399514 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/node.rb @@ -1,7 +1,7 @@ require 'strscan' module HTML #:nodoc: - + class Conditions < Hash #:nodoc: def initialize(hash) super() @@ -57,17 +57,17 @@ module HTML #:nodoc: class Node #:nodoc: # The array of children of this node. Not all nodes have children. attr_reader :children - + # The parent node of this node. All nodes have a parent, except for the # root node. attr_reader :parent - + # The line number of the input where this node was begun attr_reader :line - + # The byte position in the input where this node was begun attr_reader :position - + # Create a new node as a child of the given parent. def initialize(parent, line=0, pos=0) @parent = parent @@ -92,7 +92,7 @@ module HTML #:nodoc: # returns non +nil+. Returns the result of the #find call that succeeded. def find(conditions) conditions = validate_conditions(conditions) - @children.each do |child| + @children.each do |child| node = child.find(conditions) return node if node end @@ -133,7 +133,7 @@ module HTML #:nodoc: equivalent end - + class </) if strict - raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" + raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})" else # throw away all text until we find what we're looking for scanner.skip_until(/>/) or scanner.terminate @@ -212,9 +212,9 @@ module HTML #:nodoc: # A node that represents text, rather than markup. class Text < Node #:nodoc: - + attr_reader :content - + # Creates a new text node as a child of the given parent, with the given # content. def initialize(parent, line, pos, content) @@ -240,7 +240,7 @@ module HTML #:nodoc: def find(conditions) match(conditions) && self end - + # Returns non-+nil+ if this node meets the given conditions, or +nil+ # otherwise. See the discussion of #find for the valid conditions. def match(conditions) @@ -268,7 +268,7 @@ module HTML #:nodoc: content == node.content end end - + # A CDATA node is simply a text node with a specialized way of displaying # itself. class CDATA < Text #:nodoc: @@ -281,16 +281,16 @@ module HTML #:nodoc: # closing tag, or a self-closing tag. It has a name, and may have a hash of # attributes. class Tag < Node #:nodoc: - + # Either +nil+, :close, or :self attr_reader :closing - + # Either +nil+, or a hash of attributes for this node. attr_reader :attributes # The name of this tag. attr_reader :name - + # Create a new node as a child of the given parent, using the given content # to describe the node. It will be parsed and the node name, attributes and # closing status extracted. @@ -344,7 +344,7 @@ module HTML #:nodoc: def tag? true end - + # Returns +true+ if the node meets any of the given conditions. The # +conditions+ parameter must be a hash of any of the following keys # (all are optional): @@ -404,7 +404,7 @@ module HTML #:nodoc: # node.match :descendant => { :tag => "strong" } # # # test if the node has between 2 and 4 span tags as immediate children - # node.match :children => { :count => 2..4, :only => { :tag => "span" } } + # node.match :children => { :count => 2..4, :only => { :tag => "span" } } # # # get funky: test to see if the node is a "div", has a "ul" ancestor # # and an "li" parent (with "class" = "enum"), and whether or not it has @@ -439,7 +439,7 @@ module HTML #:nodoc: # test children return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child] - + # test ancestors if conditions[:ancestor] return false unless catch :found do @@ -457,13 +457,13 @@ module HTML #:nodoc: child.match(:descendant => conditions[:descendant]) end end - + # count children if opts = conditions[:children] matches = children.select do |c| (c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?)) end - + matches = matches.select { |c| c.match(opts[:only]) } if opts[:only] opts.each do |key, value| next if key == :only @@ -489,24 +489,24 @@ module HTML #:nodoc: self_index = siblings.index(self) if conditions[:sibling] - return false unless siblings.detect do |s| + return false unless siblings.detect do |s| s != self && s.match(conditions[:sibling]) end end if conditions[:before] - return false unless siblings[self_index+1..-1].detect do |s| + return false unless siblings[self_index+1..-1].detect do |s| s != self && s.match(conditions[:before]) end end if conditions[:after] - return false unless siblings[0,self_index].detect do |s| + return false unless siblings[0,self_index].detect do |s| s != self && s.match(conditions[:after]) end end end - + true end @@ -515,7 +515,7 @@ module HTML #:nodoc: return false unless closing == node.closing && self.name == node.name attributes == node.attributes end - + private # Match the given value to the given condition. def match_condition(value, condition) diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb index 51e0868995..dceddb9b80 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb @@ -7,11 +7,11 @@ module HTML return text unless sanitizeable?(text) tokenize(text, options).join end - + def sanitizeable?(text) !(text.nil? || text.empty? || !text.index("<")) end - + protected def tokenize(text, options) tokenizer = HTML::Tokenizer.new(text) @@ -22,12 +22,12 @@ module HTML end result end - + def process_node(node, result, options) result << node.to_s end end - + class FullSanitizer < Sanitizer def sanitize(text, options = {}) result = super @@ -37,12 +37,12 @@ module HTML # Recurse - handle all dirty nested tags result == text ? result : sanitize(result, options) end - + def process_node(node, result, options) result << node.to_s if node.class == HTML::Text end end - + class LinkSanitizer < FullSanitizer cattr_accessor :included_tags, :instance_writer => false self.included_tags = Set.new(%w(a href)) @@ -50,13 +50,13 @@ module HTML def sanitizeable?(text) !(text.nil? || text.empty? || !((text.index(""))) end - + protected def process_node(node, result, options) - result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) + result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) end end - + class WhiteListSanitizer < Sanitizer [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| @@ -66,35 +66,35 @@ module HTML # A regular expression of the valid characters used to separate protocols like # the ':' in 'http://foo.com' self.protocol_separator = /:|(�*58)|(p)|(%|%)3A/ - + # Specifies a Set of HTML attributes that can have URIs. self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed # to just escaping harmless tags like <font> self.bad_tags = Set.new(%w(script)) - + # Specifies the default Set of tags that the #sanitize helper will allow unscathed. - self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub - sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr + self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr acronym a img blockquote del ins)) - # Specifies the default Set of html attributes that the #sanitize helper will leave + # Specifies the default Set of html attributes that the #sanitize helper will leave # in the allowed tag. self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) - + # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. - self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto + self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed svn urn aim rsync tag ssh sftp rtsp afs)) - + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. - self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse - border-color border-left-color border-right-color border-top-color clear color cursor direction display + self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse + border-color border-left-color border-right-color border-top-color clear color cursor direction display elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space width)) - + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal @@ -118,9 +118,9 @@ module HTML style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| if allowed_css_properties.include?(prop.downcase) clean << prop + ': ' + val + ';' - elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) + elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) unless val.split().any? do |keyword| - !allowed_css_keywords.include?(keyword) && + !allowed_css_keywords.include?(keyword) && keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ end clean << prop + ': ' + val + ';' @@ -146,7 +146,7 @@ module HTML else options[:parent].unshift node.name end - + process_attributes_for node, options options[:tags].include?(node.name) ? node : nil @@ -154,7 +154,7 @@ module HTML bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/:not. For example: # p:not(.post) # Matches all paragraphs that do not have the class .post. - # + # # === Substitution Values # # You can use substitution with identifiers, class names and element values. diff --git a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb index 240dc1890f..c252e01cf5 100644 --- a/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb +++ b/actionpack/lib/action_controller/vendor/html-scanner/html/tokenizer.rb @@ -1,7 +1,7 @@ require 'strscan' module HTML #:nodoc: - + # A simple HTML tokenizer. It simply breaks a stream of text into tokens, where each # token is a string. Each string represents either "text", or an HTML element. # @@ -14,13 +14,13 @@ module HTML #:nodoc: # p token # end class Tokenizer #:nodoc: - + # The current (byte) position in the text attr_reader :position - + # The current line number attr_reader :line - + # Create a new Tokenizer for the given text. def initialize(text) text.encode! if text.encoding_aware? @@ -42,7 +42,7 @@ module HTML #:nodoc: update_current_line(scan_text) end end - + private # Treat the text at the current position as a tag, and scan it. Supports @@ -69,13 +69,13 @@ module HTML #:nodoc: def scan_text "#{@scanner.getch}#{@scanner.scan(/[^<]*/)}" end - + # Counts the number of newlines in the text and updates the current line # accordingly. def update_current_line(text) text.scan(/\r?\n/) { @current_line += 1 } end - + # Skips over quoted strings, so that less-than and greater-than characters # within the strings are ignored. def consume_quoted_regions @@ -103,5 +103,5 @@ module HTML #:nodoc: text end end - + end -- cgit v1.2.3