aboutsummaryrefslogtreecommitdiffstats
path: root/actionpack/test/template/html-scanner
diff options
context:
space:
mode:
authorJoshua Peek <josh@joshpeek.com>2009-10-03 21:05:51 -0500
committerJoshua Peek <josh@joshpeek.com>2009-10-03 21:05:51 -0500
commit018b79dd36d054d87fdc408d38dc9ac7f1b1500d (patch)
treea954ecef58682b2d259432a04ce503f8bb865840 /actionpack/test/template/html-scanner
parent84e94551f62d3bcbc71f1c6f3fda738342d984e2 (diff)
downloadrails-018b79dd36d054d87fdc408d38dc9ac7f1b1500d.tar.gz
rails-018b79dd36d054d87fdc408d38dc9ac7f1b1500d.tar.bz2
rails-018b79dd36d054d87fdc408d38dc9ac7f1b1500d.zip
File extra test folders into controller, dispatch, or template
Diffstat (limited to 'actionpack/test/template/html-scanner')
-rw-r--r--actionpack/test/template/html-scanner/cdata_node_test.rb15
-rw-r--r--actionpack/test/template/html-scanner/document_test.rb148
-rw-r--r--actionpack/test/template/html-scanner/node_test.rb89
-rw-r--r--actionpack/test/template/html-scanner/sanitizer_test.rb273
-rw-r--r--actionpack/test/template/html-scanner/tag_node_test.rb238
-rw-r--r--actionpack/test/template/html-scanner/text_node_test.rb50
-rw-r--r--actionpack/test/template/html-scanner/tokenizer_test.rb131
7 files changed, 944 insertions, 0 deletions
diff --git a/actionpack/test/template/html-scanner/cdata_node_test.rb b/actionpack/test/template/html-scanner/cdata_node_test.rb
new file mode 100644
index 0000000000..1822cc565a
--- /dev/null
+++ b/actionpack/test/template/html-scanner/cdata_node_test.rb
@@ -0,0 +1,15 @@
+require 'abstract_unit'
+
+class CDATANodeTest < Test::Unit::TestCase
+ def setup
+ @node = HTML::CDATA.new(nil, 0, 0, "<p>howdy</p>")
+ end
+
+ def test_to_s
+ assert_equal "<![CDATA[<p>howdy</p>]]>", @node.to_s
+ end
+
+ def test_content
+ assert_equal "<p>howdy</p>", @node.content
+ end
+end
diff --git a/actionpack/test/template/html-scanner/document_test.rb b/actionpack/test/template/html-scanner/document_test.rb
new file mode 100644
index 0000000000..c68f04fa75
--- /dev/null
+++ b/actionpack/test/template/html-scanner/document_test.rb
@@ -0,0 +1,148 @@
+require 'abstract_unit'
+
+class DocumentTest < Test::Unit::TestCase
+ def test_handle_doctype
+ doc = nil
+ assert_nothing_raised do
+ doc = HTML::Document.new <<-HTML.strip
+ <!DOCTYPE "blah" "blah" "blah">
+ <html>
+ </html>
+ HTML
+ end
+ assert_equal 3, doc.root.children.length
+ assert_equal %{<!DOCTYPE "blah" "blah" "blah">}, doc.root.children[0].content
+ assert_match %r{\s+}m, doc.root.children[1].content
+ assert_equal "html", doc.root.children[2].name
+ end
+
+ def test_find_img
+ doc = HTML::Document.new <<-HTML.strip
+ <html>
+ <body>
+ <p><img src="hello.gif"></p>
+ </body>
+ </html>
+ HTML
+ assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"})
+ end
+
+ def test_find_all
+ doc = HTML::Document.new <<-HTML.strip
+ <html>
+ <body>
+ <p class="test"><img src="hello.gif"></p>
+ <div class="foo">
+ <p class="test">something</p>
+ <p>here is <em class="test">more</em></p>
+ </div>
+ </body>
+ </html>
+ HTML
+ all = doc.find_all :attributes => { :class => "test" }
+ assert_equal 3, all.length
+ assert_equal [ "p", "p", "em" ], all.map { |n| n.name }
+ end
+
+ def test_find_with_text
+ doc = HTML::Document.new <<-HTML.strip
+ <html>
+ <body>
+ <p>Some text</p>
+ </body>
+ </html>
+ HTML
+ assert doc.find(:content => "Some text")
+ assert doc.find(:tag => "p", :child => { :content => "Some text" })
+ assert doc.find(:tag => "p", :child => "Some text")
+ assert doc.find(:tag => "p", :content => "Some text")
+ end
+
+ def test_parse_xml
+ assert_nothing_raised { HTML::Document.new("<tags><tag/></tags>", true, true) }
+ assert_nothing_raised { HTML::Document.new("<outer><link>something</link></outer>", true, true) }
+ end
+
+ def test_parse_document
+ doc = HTML::Document.new(<<-HTML)
+ <div>
+ <h2>blah</h2>
+ <table>
+ </table>
+ </div>
+ HTML
+ assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } })
+ end
+
+ def test_tag_nesting_nothing_to_s
+ doc = HTML::Document.new("<tag></tag>")
+ assert_equal "<tag></tag>", doc.root.to_s
+ end
+
+ def test_tag_nesting_space_to_s
+ doc = HTML::Document.new("<tag> </tag>")
+ assert_equal "<tag> </tag>", doc.root.to_s
+ end
+
+ def test_tag_nesting_text_to_s
+ doc = HTML::Document.new("<tag>text</tag>")
+ assert_equal "<tag>text</tag>", doc.root.to_s
+ end
+
+ def test_tag_nesting_tag_to_s
+ doc = HTML::Document.new("<tag><nested /></tag>")
+ assert_equal "<tag><nested /></tag>", doc.root.to_s
+ end
+
+ def test_parse_cdata
+ doc = HTML::Document.new(<<-HTML)
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+ <head>
+ <title><![CDATA[<br>]]></title>
+ </head>
+ <body>
+ <p>this document has &lt;br&gt; for a title</p>
+ </body>
+</html>
+HTML
+
+ assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" })
+ assert doc.find(:tag => "title", :child => "<br>")
+ end
+
+ def test_find_empty_tag
+ doc = HTML::Document.new("<div id='map'></div>")
+ assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./)
+ assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/)
+ assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/)
+ assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "")
+ assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil)
+ end
+
+ def test_parse_invalid_document
+ assert_nothing_raised do
+ doc = HTML::Document.new("<html>
+ <table>
+ <tr>
+ <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td>
+ </tr>
+ </table>
+ </html>")
+ end
+ end
+
+ def test_invalid_document_raises_exception_when_strict
+ assert_raise RuntimeError do
+ doc = HTML::Document.new("<html>
+ <table>
+ <tr>
+ <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td>
+ </tr>
+ </table>
+ </html>", true)
+ end
+ end
+
+end
diff --git a/actionpack/test/template/html-scanner/node_test.rb b/actionpack/test/template/html-scanner/node_test.rb
new file mode 100644
index 0000000000..b0df36877e
--- /dev/null
+++ b/actionpack/test/template/html-scanner/node_test.rb
@@ -0,0 +1,89 @@
+require 'abstract_unit'
+
+class NodeTest < Test::Unit::TestCase
+
+ class MockNode
+ def initialize(matched, value)
+ @matched = matched
+ @value = value
+ end
+
+ def find(conditions)
+ @matched && self
+ end
+
+ def to_s
+ @value.to_s
+ end
+ end
+
+ def setup
+ @node = HTML::Node.new("parent")
+ @node.children.concat [MockNode.new(false,1), MockNode.new(true,"two"), MockNode.new(false,:three)]
+ end
+
+ def test_match
+ assert !@node.match("foo")
+ end
+
+ def test_tag
+ assert !@node.tag?
+ end
+
+ def test_to_s
+ assert_equal "1twothree", @node.to_s
+ end
+
+ def test_find
+ assert_equal "two", @node.find('blah').to_s
+ end
+
+ def test_parse_strict
+ s = "<b foo='hello'' bar='baz'>"
+ assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) }
+ end
+
+ def test_parse_relaxed
+ s = "<b foo='hello'' bar='baz'>"
+ node = nil
+ assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
+ assert node.attributes.has_key?("foo")
+ assert !node.attributes.has_key?("bar")
+ end
+
+ def test_to_s_with_boolean_attrs
+ s = "<b foo bar>"
+ node = HTML::Node.parse(nil,0,0,s)
+ assert node.attributes.has_key?("foo")
+ assert node.attributes.has_key?("bar")
+ assert "<b foo bar>", node.to_s
+ end
+
+ def test_parse_with_unclosed_tag
+ s = "<span onmouseover='bang'"
+ node = nil
+ assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
+ assert node.attributes.has_key?("onmouseover")
+ end
+
+ def test_parse_with_valid_cdata_section
+ s = "<![CDATA[<span>contents</span>]]>"
+ node = nil
+ assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
+ assert_kind_of HTML::CDATA, node
+ assert_equal '<span>contents</span>', node.content
+ end
+
+ def test_parse_strict_with_unterminated_cdata_section
+ s = "<![CDATA[neverending..."
+ assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) }
+ end
+
+ def test_parse_relaxed_with_unterminated_cdata_section
+ s = "<![CDATA[neverending..."
+ node = nil
+ assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) }
+ assert_kind_of HTML::CDATA, node
+ assert_equal 'neverending...', node.content
+ end
+end
diff --git a/actionpack/test/template/html-scanner/sanitizer_test.rb b/actionpack/test/template/html-scanner/sanitizer_test.rb
new file mode 100644
index 0000000000..e85a5c7abf
--- /dev/null
+++ b/actionpack/test/template/html-scanner/sanitizer_test.rb
@@ -0,0 +1,273 @@
+require 'abstract_unit'
+
+class SanitizerTest < ActionController::TestCase
+ def setup
+ @sanitizer = nil # used by assert_sanitizer
+ end
+
+ def test_strip_tags
+ sanitizer = HTML::FullSanitizer.new
+ assert_equal("<<<bad html", sanitizer.sanitize("<<<bad html"))
+ assert_equal("<<", sanitizer.sanitize("<<<bad html>"))
+ assert_equal("Dont touch me", sanitizer.sanitize("Dont touch me"))
+ assert_equal("This is a test.", sanitizer.sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>"))
+ assert_equal("Weirdos", sanitizer.sanitize("Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"))
+ assert_equal("This is a test.", sanitizer.sanitize("This is a test."))
+ assert_equal(
+ %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize(
+ %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}))
+ assert_equal "This has a here.", sanitizer.sanitize("This has a <!-- comment --> here.")
+ assert_equal "This has a here.", sanitizer.sanitize("This has a <![CDATA[<section>]]> here.")
+ assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed <![CDATA[<section>]] here...")
+ [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) }
+ end
+
+ def test_strip_links
+ sanitizer = HTML::LinkSanitizer.new
+ assert_equal "Dont touch me", sanitizer.sanitize("Dont touch me")
+ assert_equal "on my mind\nall day long", sanitizer.sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
+ assert_equal "0wn3d", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
+ assert_equal "Magic", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
+ assert_equal "FrrFox", sanitizer.sanitize("<href onlclick='steal()'>FrrFox</a></href>")
+ assert_equal "My mind\nall <b>day</b> long", sanitizer.sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
+ assert_equal "all <b>day</b> long", sanitizer.sanitize("<<a>a href='hello'>all <b>day</b> long<</A>/a>")
+
+ assert_equal "<a<a", sanitizer.sanitize("<a<a")
+ end
+
+ def test_sanitize_form
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
+ end
+
+ def test_sanitize_plaintext
+ raw = "<plaintext><span>foo</span></plaintext>"
+ assert_sanitized raw, "<span>foo</span>"
+ end
+
+ def test_sanitize_script
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cd e f"
+ end
+
+ # fucked
+ def test_sanitize_js_handlers
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
+ assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>}
+ end
+
+ def test_sanitize_javascript_href
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
+ end
+
+ def test_sanitize_image_src
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
+ end
+
+ HTML::WhiteListSanitizer.allowed_tags.each do |tag_name|
+ define_method "test_should_allow_#{tag_name}_tag" do
+ assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
+ end
+ end
+
+ def test_should_allow_anchors
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>)
+ end
+
+ # RFC 3986, sec 4.2
+ def test_allow_colons_in_path_component
+ assert_sanitized("<a href=\"./this:that\">foo</a>")
+ end
+
+ %w(src width height alt).each do |img_attr|
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
+ end
+ end
+
+ def test_should_handle_non_html
+ assert_sanitized 'abc'
+ end
+
+ def test_should_handle_blank_text
+ assert_sanitized nil
+ assert_sanitized ''
+ end
+
+ def test_should_allow_custom_tags
+ text = "<u>foo</u>"
+ sanitizer = HTML::WhiteListSanitizer.new
+ assert_equal(text, sanitizer.sanitize(text, :tags => %w(u)))
+ end
+
+ def test_should_allow_only_custom_tags
+ text = "<u>foo</u> with <i>bar</i>"
+ sanitizer = HTML::WhiteListSanitizer.new
+ assert_equal("<u>foo</u> with bar", sanitizer.sanitize(text, :tags => %w(u)))
+ end
+
+ def test_should_allow_custom_tags_with_attributes
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
+ sanitizer = HTML::WhiteListSanitizer.new
+ assert_equal(text, sanitizer.sanitize(text))
+ end
+
+ def test_should_allow_custom_tags_with_custom_attributes
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
+ sanitizer = HTML::WhiteListSanitizer.new
+ assert_equal(text, sanitizer.sanitize(text, :attributes => ['foo']))
+ end
+
+ [%w(img src), %w(a href)].each do |(tag, attr)|
+ define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
+ assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
+ end
+ end
+
+ def test_should_flag_bad_protocols
+ sanitizer = HTML::WhiteListSanitizer.new
+ %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto|
+ assert sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://bad")
+ end
+ end
+
+ def test_should_accept_good_protocols
+ sanitizer = HTML::WhiteListSanitizer.new
+ HTML::WhiteListSanitizer.allowed_protocols.each do |proto|
+ assert !sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://good")
+ end
+ end
+
+ def test_should_reject_hex_codes_in_protocol
+ assert_sanitized %(<a href="&#37;6A&#37;61&#37;76&#37;61&#37;73&#37;63&#37;72&#37;69&#37;70&#37;74&#37;3A&#37;61&#37;6C&#37;65&#37;72&#37;74&#37;28&#37;22&#37;58&#37;53&#37;53&#37;22&#37;29">1</a>), "<a>1</a>"
+ assert @sanitizer.send(:contains_bad_protocols?, 'src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29")
+ end
+
+ def test_should_block_script_tag
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
+ end
+
+ [%(<IMG SRC="javascript:alert('XSS');">),
+ %(<IMG SRC=javascript:alert('XSS')>),
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
+ %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">),
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i|
+ define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do
+ assert_sanitized img_hack, "<img>"
+ end
+ end
+
+ def test_should_sanitize_tag_broken_up_by_null
+ assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")"
+ end
+
+ def test_should_sanitize_invalid_script_tag
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
+ end
+
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;"
+ assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(&lt;a)
+ end
+
+ def test_should_sanitize_unclosed_script
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>"
+ end
+
+ def test_should_sanitize_half_open_scripts
+ assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
+ end
+
+ def test_should_not_fall_for_ridiculous_hack
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
+ assert_sanitized img_hack, "<img>"
+ end
+
+ # fucked
+ def test_should_sanitize_attributes
+ assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>)
+ end
+
+ def test_should_sanitize_illegal_style_properties
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
+ expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;)
+ assert_equal expected, sanitize_css(raw)
+ end
+
+ def test_should_sanitize_with_trailing_space
+ raw = "display:block; "
+ expected = "display: block;"
+ assert_equal expected, sanitize_css(raw)
+ end
+
+ def test_should_sanitize_xul_style_attributes
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
+ assert_equal '', sanitize_css(raw)
+ end
+
+ def test_should_sanitize_invalid_tag_names
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
+ end
+
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
+ end
+
+ def test_should_sanitize_invalid_tag_names_in_single_tags
+ assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
+ end
+
+ def test_should_sanitize_img_dynsrc_lowsrc
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
+ end
+
+ def test_should_sanitize_div_background_image_unicode_encoded
+ raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029)
+ assert_equal '', sanitize_css(raw)
+ end
+
+ def test_should_sanitize_div_style_expression
+ raw = %(width: expression(alert('XSS'));)
+ assert_equal '', sanitize_css(raw)
+ end
+
+ def test_should_sanitize_img_vbscript
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
+ end
+
+ def test_should_sanitize_cdata_section
+ assert_sanitized "<![CDATA[<span>section</span>]]>", "&lt;![CDATA[&lt;span>section&lt;/span>]]>"
+ end
+
+ def test_should_sanitize_unterminated_cdata_section
+ assert_sanitized "<![CDATA[<span>neverending...", "&lt;![CDATA[&lt;span>neverending...]]>"
+ end
+
+ def test_should_not_mangle_urls_with_ampersand
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
+ end
+
+protected
+ def assert_sanitized(input, expected = nil)
+ @sanitizer ||= HTML::WhiteListSanitizer.new
+ if input
+ assert_dom_equal expected || input, @sanitizer.sanitize(input)
+ else
+ assert_nil @sanitizer.sanitize(input)
+ end
+ end
+
+ def sanitize_css(input)
+ (@sanitizer ||= HTML::WhiteListSanitizer.new).sanitize_css(input)
+ end
+end
diff --git a/actionpack/test/template/html-scanner/tag_node_test.rb b/actionpack/test/template/html-scanner/tag_node_test.rb
new file mode 100644
index 0000000000..d1d4667378
--- /dev/null
+++ b/actionpack/test/template/html-scanner/tag_node_test.rb
@@ -0,0 +1,238 @@
+require 'abstract_unit'
+
+class TagNodeTest < Test::Unit::TestCase
+ def test_open_without_attributes
+ node = tag("<tag>")
+ assert_equal "tag", node.name
+ assert_equal Hash.new, node.attributes
+ assert_nil node.closing
+ end
+
+ def test_open_with_attributes
+ node = tag("<TAG1 foo=hey_ho x:bar=\"blah blah\" BAZ='blah blah blah' >")
+ assert_equal "tag1", node.name
+ assert_equal "hey_ho", node["foo"]
+ assert_equal "blah blah", node["x:bar"]
+ assert_equal "blah blah blah", node["baz"]
+ end
+
+ def test_self_closing_without_attributes
+ node = tag("<tag/>")
+ assert_equal "tag", node.name
+ assert_equal Hash.new, node.attributes
+ assert_equal :self, node.closing
+ end
+
+ def test_self_closing_with_attributes
+ node = tag("<tag a=b/>")
+ assert_equal "tag", node.name
+ assert_equal( { "a" => "b" }, node.attributes )
+ assert_equal :self, node.closing
+ end
+
+ def test_closing_without_attributes
+ node = tag("</tag>")
+ assert_equal "tag", node.name
+ assert_nil node.attributes
+ assert_equal :close, node.closing
+ end
+
+ def test_bracket_op_when_no_attributes
+ node = tag("</tag>")
+ assert_nil node["foo"]
+ end
+
+ def test_bracket_op_when_attributes
+ node = tag("<tag a=b/>")
+ assert_equal "b", node["a"]
+ end
+
+ def test_attributes_with_escaped_quotes
+ node = tag("<tag a='b\\'c' b=\"bob \\\"float\\\"\">")
+ assert_equal "b\\'c", node["a"]
+ assert_equal "bob \\\"float\\\"", node["b"]
+ end
+
+ def test_to_s
+ node = tag("<a b=c d='f' g=\"h 'i'\" />")
+ assert_equal %(<a b='c' d='f' g='h \\'i\\'' />), node.to_s
+ end
+
+ def test_tag
+ assert tag("<tag>").tag?
+ end
+
+ def test_match_tag_as_string
+ assert tag("<tag>").match(:tag => "tag")
+ assert !tag("<tag>").match(:tag => "b")
+ end
+
+ def test_match_tag_as_regexp
+ assert tag("<tag>").match(:tag => /t.g/)
+ assert !tag("<tag>").match(:tag => /t[bqs]g/)
+ end
+
+ def test_match_attributes_as_string
+ t = tag("<tag a=something b=else />")
+ assert t.match(:attributes => {"a" => "something"})
+ assert t.match(:attributes => {"b" => "else"})
+ end
+
+ def test_match_attributes_as_regexp
+ t = tag("<tag a=something b=else />")
+ assert t.match(:attributes => {"a" => /^something$/})
+ assert t.match(:attributes => {"b" => /e.*e/})
+ assert t.match(:attributes => {"a" => /me..i/, "b" => /.ls.$/})
+ end
+
+ def test_match_attributes_as_number
+ t = tag("<tag a=15 b=3.1415 />")
+ assert t.match(:attributes => {"a" => 15})
+ assert t.match(:attributes => {"b" => 3.1415})
+ assert t.match(:attributes => {"a" => 15, "b" => 3.1415})
+ end
+
+ def test_match_attributes_exist
+ t = tag("<tag a=15 b=3.1415 />")
+ assert t.match(:attributes => {"a" => true})
+ assert t.match(:attributes => {"b" => true})
+ assert t.match(:attributes => {"a" => true, "b" => true})
+ end
+
+ def test_match_attributes_not_exist
+ t = tag("<tag a=15 b=3.1415 />")
+ assert t.match(:attributes => {"c" => false})
+ assert t.match(:attributes => {"c" => nil})
+ assert t.match(:attributes => {"a" => true, "c" => false})
+ end
+
+ def test_match_parent_success
+ t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>"))
+ assert t.match(:parent => {:tag => "foo", :attributes => {"k" => /v.l/, "j" => false}})
+ end
+
+ def test_match_parent_fail
+ t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>"))
+ assert !t.match(:parent => {:tag => /kafka/})
+ end
+
+ def test_match_child_success
+ t = tag("<tag x:k='something'>")
+ tag("<child v=john a=kelly>", t)
+ tag("<sib m=vaughn v=james>", t)
+ assert t.match(:child => { :tag => "sib", :attributes => {"v" => /j/}})
+ assert t.match(:child => { :attributes => {"a" => "kelly"}})
+ end
+
+ def test_match_child_fail
+ t = tag("<tag x:k='something'>")
+ tag("<child v=john a=kelly>", t)
+ tag("<sib m=vaughn v=james>", t)
+ assert !t.match(:child => { :tag => "sib", :attributes => {"v" => /r/}})
+ assert !t.match(:child => { :attributes => {"v" => false}})
+ end
+
+ def test_match_ancestor_success
+ t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>")))
+ assert t.match(:ancestor => {:tag => "parent", :attributes => {"a" => /ll/}})
+ assert t.match(:ancestor => {:attributes => {"m" => "vaughn"}})
+ end
+
+ def test_match_ancestor_fail
+ t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>")))
+ assert !t.match(:ancestor => {:tag => /^parent/, :attributes => {"v" => /m/}})
+ assert !t.match(:ancestor => {:attributes => {"v" => false}})
+ end
+
+ def test_match_descendant_success
+ tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>")))
+ assert t.match(:descendant => {:tag => "child", :attributes => {"a" => /ll/}})
+ assert t.match(:descendant => {:attributes => {"m" => "vaughn"}})
+ end
+
+ def test_match_descendant_fail
+ tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>")))
+ assert !t.match(:descendant => {:tag => /^child/, :attributes => {"v" => /m/}})
+ assert !t.match(:descendant => {:attributes => {"v" => false}})
+ end
+
+ def test_match_child_count
+ t = tag("<tag x:k='something'>")
+ tag("hello", t)
+ tag("<child v=john a=kelly>", t)
+ tag("<sib m=vaughn v=james>", t)
+ assert t.match(:children => { :count => 2 })
+ assert t.match(:children => { :count => 2..4 })
+ assert t.match(:children => { :less_than => 4 })
+ assert t.match(:children => { :greater_than => 1 })
+ assert !t.match(:children => { :count => 3 })
+ end
+
+ def test_conditions_as_strings
+ t = tag("<tag x:k='something'>")
+ assert t.match("tag" => "tag")
+ assert t.match("attributes" => { "x:k" => "something" })
+ assert !t.match("tag" => "gat")
+ assert !t.match("attributes" => { "x:j" => "something" })
+ end
+
+ def test_attributes_as_symbols
+ t = tag("<child v=john a=kelly>")
+ assert t.match(:attributes => { :v => /oh/ })
+ assert t.match(:attributes => { :a => /ll/ })
+ end
+
+ def test_match_sibling
+ t = tag("<tag x:k='something'>")
+ tag("hello", t)
+ tag("<span a=b>", t)
+ tag("world", t)
+ m = tag("<span k=r>", t)
+ tag("<span m=l>", t)
+
+ assert m.match(:sibling => {:tag => "span", :attributes => {:a => true}})
+ assert m.match(:sibling => {:tag => "span", :attributes => {:m => true}})
+ assert !m.match(:sibling => {:tag => "span", :attributes => {:k => true}})
+ end
+
+ def test_match_sibling_before
+ t = tag("<tag x:k='something'>")
+ tag("hello", t)
+ tag("<span a=b>", t)
+ tag("world", t)
+ m = tag("<span k=r>", t)
+ tag("<span m=l>", t)
+
+ assert m.match(:before => {:tag => "span", :attributes => {:m => true}})
+ assert !m.match(:before => {:tag => "span", :attributes => {:a => true}})
+ assert !m.match(:before => {:tag => "span", :attributes => {:k => true}})
+ end
+
+ def test_match_sibling_after
+ t = tag("<tag x:k='something'>")
+ tag("hello", t)
+ tag("<span a=b>", t)
+ tag("world", t)
+ m = tag("<span k=r>", t)
+ tag("<span m=l>", t)
+
+ assert m.match(:after => {:tag => "span", :attributes => {:a => true}})
+ assert !m.match(:after => {:tag => "span", :attributes => {:m => true}})
+ assert !m.match(:after => {:tag => "span", :attributes => {:k => true}})
+ end
+
+ def test_to_s
+ t = tag("<b x='foo'>")
+ tag("hello", t)
+ tag("<hr />", t)
+ assert_equal %(<b x="foo">hello<hr /></b>), t.to_s
+ end
+
+ private
+
+ def tag(content, parent=nil)
+ node = HTML::Node.parse(parent,0,0,content)
+ parent.children << node if parent
+ node
+ end
+end
diff --git a/actionpack/test/template/html-scanner/text_node_test.rb b/actionpack/test/template/html-scanner/text_node_test.rb
new file mode 100644
index 0000000000..1ab3f4454e
--- /dev/null
+++ b/actionpack/test/template/html-scanner/text_node_test.rb
@@ -0,0 +1,50 @@
+require 'abstract_unit'
+
+class TextNodeTest < Test::Unit::TestCase
+ def setup
+ @node = HTML::Text.new(nil, 0, 0, "hello, howdy, aloha, annyeong")
+ end
+
+ def test_to_s
+ assert_equal "hello, howdy, aloha, annyeong", @node.to_s
+ end
+
+ def test_find_string
+ assert_equal @node, @node.find("hello, howdy, aloha, annyeong")
+ assert_equal false, @node.find("bogus")
+ end
+
+ def test_find_regexp
+ assert_equal @node, @node.find(/an+y/)
+ assert_nil @node.find(/b/)
+ end
+
+ def test_find_hash
+ assert_equal @node, @node.find(:content => /howdy/)
+ assert_nil @node.find(:content => /^howdy$/)
+ assert_equal false, @node.find(:content => "howdy")
+ end
+
+ def test_find_other
+ assert_nil @node.find(:hello)
+ end
+
+ def test_match_string
+ assert @node.match("hello, howdy, aloha, annyeong")
+ assert_equal false, @node.match("bogus")
+ end
+
+ def test_match_regexp
+ assert_not_nil @node, @node.match(/an+y/)
+ assert_nil @node.match(/b/)
+ end
+
+ def test_match_hash
+ assert_not_nil @node, @node.match(:content => "howdy")
+ assert_nil @node.match(:content => /^howdy$/)
+ end
+
+ def test_match_other
+ assert_nil @node.match(:hello)
+ end
+end
diff --git a/actionpack/test/template/html-scanner/tokenizer_test.rb b/actionpack/test/template/html-scanner/tokenizer_test.rb
new file mode 100644
index 0000000000..a001bcbbad
--- /dev/null
+++ b/actionpack/test/template/html-scanner/tokenizer_test.rb
@@ -0,0 +1,131 @@
+require 'abstract_unit'
+
+class TokenizerTest < Test::Unit::TestCase
+
+ def test_blank
+ tokenize ""
+ assert_end
+ end
+
+ def test_space
+ tokenize " "
+ assert_next " "
+ assert_end
+ end
+
+ def test_tag_simple_open
+ tokenize "<tag>"
+ assert_next "<tag>"
+ assert_end
+ end
+
+ def test_tag_simple_self_closing
+ tokenize "<tag />"
+ assert_next "<tag />"
+ assert_end
+ end
+
+ def test_tag_simple_closing
+ tokenize "</tag>"
+ assert_next "</tag>"
+ end
+
+ def test_tag_with_single_quoted_attribute
+ tokenize %{<tag a='hello'>x}
+ assert_next %{<tag a='hello'>}
+ end
+
+ def test_tag_with_single_quoted_attribute_with_escape
+ tokenize %{<tag a='hello\\''>x}
+ assert_next %{<tag a='hello\\''>}
+ end
+
+ def test_tag_with_double_quoted_attribute
+ tokenize %{<tag a="hello">x}
+ assert_next %{<tag a="hello">}
+ end
+
+ def test_tag_with_double_quoted_attribute_with_escape
+ tokenize %{<tag a="hello\\"">x}
+ assert_next %{<tag a="hello\\"">}
+ end
+
+ def test_tag_with_unquoted_attribute
+ tokenize %{<tag a=hello>x}
+ assert_next %{<tag a=hello>}
+ end
+
+ def test_tag_with_lt_char_in_attribute
+ tokenize %{<tag a="x < y">x}
+ assert_next %{<tag a="x < y">}
+ end
+
+ def test_tag_with_gt_char_in_attribute
+ tokenize %{<tag a="x > y">x}
+ assert_next %{<tag a="x > y">}
+ end
+
+ def test_doctype_tag
+ tokenize %{<!DOCTYPE "blah" "blah" "blah">\n <html>}
+ assert_next %{<!DOCTYPE "blah" "blah" "blah">}
+ assert_next %{\n }
+ assert_next %{<html>}
+ end
+
+ def test_cdata_tag
+ tokenize %{<![CDATA[<br>]]>}
+ assert_next %{<![CDATA[<br>]]>}
+ assert_end
+ end
+
+ def test_unterminated_cdata_tag
+ tokenize %{<content:encoded><![CDATA[ neverending...}
+ assert_next %{<content:encoded>}
+ assert_next %{<![CDATA[ neverending...}
+ assert_end
+ end
+
+ def test_less_than_with_space
+ tokenize %{original < hello > world}
+ assert_next %{original }
+ assert_next %{< hello > world}
+ end
+
+ def test_less_than_without_matching_greater_than
+ tokenize %{hello <span onmouseover="gotcha"\n<b>foo</b>\nbar</span>}
+ assert_next %{hello }
+ assert_next %{<span onmouseover="gotcha"\n}
+ assert_next %{<b>}
+ assert_next %{foo}
+ assert_next %{</b>}
+ assert_next %{\nbar}
+ assert_next %{</span>}
+ assert_end
+ end
+
+ def test_unterminated_comment
+ tokenize %{hello <!-- neverending...}
+ assert_next %{hello }
+ assert_next %{<!-- neverending...}
+ assert_end
+ end
+
+ private
+
+ def tokenize(text)
+ @tokenizer = HTML::Tokenizer.new(text)
+ end
+
+ def assert_next(expected, message=nil)
+ token = @tokenizer.next
+ assert_equal expected, token, message
+ end
+
+ def assert_sequence(*expected)
+ assert_next expected.shift until expected.empty?
+ end
+
+ def assert_end(message=nil)
+ assert_nil @tokenizer.next, message
+ end
+end