From 595107f22e6a26e20860d0023478437a51daa5c6 Mon Sep 17 00:00:00 2001 From: Pratik Naik Date: Sat, 23 May 2009 02:20:28 +0200 Subject: Move html-scanner tests one dir up --- .../controller/html-scanner/cdata_node_test.rb | 15 -- .../test/controller/html-scanner/document_test.rb | 148 ----------- .../test/controller/html-scanner/node_test.rb | 89 ------- .../test/controller/html-scanner/sanitizer_test.rb | 273 --------------------- .../test/controller/html-scanner/tag_node_test.rb | 238 ------------------ .../test/controller/html-scanner/text_node_test.rb | 50 ---- .../test/controller/html-scanner/tokenizer_test.rb | 131 ---------- actionpack/test/html-scanner/cdata_node_test.rb | 15 ++ actionpack/test/html-scanner/document_test.rb | 148 +++++++++++ actionpack/test/html-scanner/node_test.rb | 89 +++++++ actionpack/test/html-scanner/sanitizer_test.rb | 273 +++++++++++++++++++++ actionpack/test/html-scanner/tag_node_test.rb | 238 ++++++++++++++++++ actionpack/test/html-scanner/text_node_test.rb | 50 ++++ actionpack/test/html-scanner/tokenizer_test.rb | 131 ++++++++++ 14 files changed, 944 insertions(+), 944 deletions(-) delete mode 100644 actionpack/test/controller/html-scanner/cdata_node_test.rb delete mode 100644 actionpack/test/controller/html-scanner/document_test.rb delete mode 100644 actionpack/test/controller/html-scanner/node_test.rb delete mode 100644 actionpack/test/controller/html-scanner/sanitizer_test.rb delete mode 100644 actionpack/test/controller/html-scanner/tag_node_test.rb delete mode 100644 actionpack/test/controller/html-scanner/text_node_test.rb delete mode 100644 actionpack/test/controller/html-scanner/tokenizer_test.rb create mode 100644 actionpack/test/html-scanner/cdata_node_test.rb create mode 100644 actionpack/test/html-scanner/document_test.rb create mode 100644 actionpack/test/html-scanner/node_test.rb create mode 100644 actionpack/test/html-scanner/sanitizer_test.rb create mode 100644 actionpack/test/html-scanner/tag_node_test.rb create mode 100644 actionpack/test/html-scanner/text_node_test.rb create mode 100644 actionpack/test/html-scanner/tokenizer_test.rb (limited to 'actionpack/test') diff --git a/actionpack/test/controller/html-scanner/cdata_node_test.rb b/actionpack/test/controller/html-scanner/cdata_node_test.rb deleted file mode 100644 index 1822cc565a..0000000000 --- a/actionpack/test/controller/html-scanner/cdata_node_test.rb +++ /dev/null @@ -1,15 +0,0 @@ -require 'abstract_unit' - -class CDATANodeTest < Test::Unit::TestCase - def setup - @node = HTML::CDATA.new(nil, 0, 0, "

howdy

") - end - - def test_to_s - assert_equal "howdy

]]>", @node.to_s - end - - def test_content - assert_equal "

howdy

", @node.content - end -end diff --git a/actionpack/test/controller/html-scanner/document_test.rb b/actionpack/test/controller/html-scanner/document_test.rb deleted file mode 100644 index c68f04fa75..0000000000 --- a/actionpack/test/controller/html-scanner/document_test.rb +++ /dev/null @@ -1,148 +0,0 @@ -require 'abstract_unit' - -class DocumentTest < Test::Unit::TestCase - def test_handle_doctype - doc = nil - assert_nothing_raised do - doc = HTML::Document.new <<-HTML.strip - - - - HTML - end - assert_equal 3, doc.root.children.length - assert_equal %{}, doc.root.children[0].content - assert_match %r{\s+}m, doc.root.children[1].content - assert_equal "html", doc.root.children[2].name - end - - def test_find_img - doc = HTML::Document.new <<-HTML.strip - - -

- - - HTML - assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"}) - end - - def test_find_all - doc = HTML::Document.new <<-HTML.strip - - -

-
-

something

-

here is more

-
- - - HTML - all = doc.find_all :attributes => { :class => "test" } - assert_equal 3, all.length - assert_equal [ "p", "p", "em" ], all.map { |n| n.name } - end - - def test_find_with_text - doc = HTML::Document.new <<-HTML.strip - - -

Some text

- - - HTML - assert doc.find(:content => "Some text") - assert doc.find(:tag => "p", :child => { :content => "Some text" }) - assert doc.find(:tag => "p", :child => "Some text") - assert doc.find(:tag => "p", :content => "Some text") - end - - def test_parse_xml - assert_nothing_raised { HTML::Document.new("", true, true) } - assert_nothing_raised { HTML::Document.new("something", true, true) } - end - - def test_parse_document - doc = HTML::Document.new(<<-HTML) -
-

blah

- -
-
- HTML - assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } }) - end - - def test_tag_nesting_nothing_to_s - doc = HTML::Document.new("") - assert_equal "", doc.root.to_s - end - - def test_tag_nesting_space_to_s - doc = HTML::Document.new(" ") - assert_equal " ", doc.root.to_s - end - - def test_tag_nesting_text_to_s - doc = HTML::Document.new("text") - assert_equal "text", doc.root.to_s - end - - def test_tag_nesting_tag_to_s - doc = HTML::Document.new("") - assert_equal "", doc.root.to_s - end - - def test_parse_cdata - doc = HTML::Document.new(<<-HTML) - - - - <![CDATA[<br>]]> - - -

this document has <br> for a title

- - -HTML - - assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" }) - assert doc.find(:tag => "title", :child => "
") - end - - def test_find_empty_tag - doc = HTML::Document.new("
") - assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/) - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "") - assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil) - end - - def test_parse_invalid_document - assert_nothing_raised do - doc = HTML::Document.new(" - - - - -
About Us
- ") - end - end - - def test_invalid_document_raises_exception_when_strict - assert_raise RuntimeError do - doc = HTML::Document.new(" - - - - -
About Us
- ", true) - end - end - -end diff --git a/actionpack/test/controller/html-scanner/node_test.rb b/actionpack/test/controller/html-scanner/node_test.rb deleted file mode 100644 index b0df36877e..0000000000 --- a/actionpack/test/controller/html-scanner/node_test.rb +++ /dev/null @@ -1,89 +0,0 @@ -require 'abstract_unit' - -class NodeTest < Test::Unit::TestCase - - class MockNode - def initialize(matched, value) - @matched = matched - @value = value - end - - def find(conditions) - @matched && self - end - - def to_s - @value.to_s - end - end - - def setup - @node = HTML::Node.new("parent") - @node.children.concat [MockNode.new(false,1), MockNode.new(true,"two"), MockNode.new(false,:three)] - end - - def test_match - assert !@node.match("foo") - end - - def test_tag - assert !@node.tag? - end - - def test_to_s - assert_equal "1twothree", @node.to_s - end - - def test_find - assert_equal "two", @node.find('blah').to_s - end - - def test_parse_strict - s = "" - assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) } - end - - def test_parse_relaxed - s = "" - node = nil - assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } - assert node.attributes.has_key?("foo") - assert !node.attributes.has_key?("bar") - end - - def test_to_s_with_boolean_attrs - s = "" - node = HTML::Node.parse(nil,0,0,s) - assert node.attributes.has_key?("foo") - assert node.attributes.has_key?("bar") - assert "", node.to_s - end - - def test_parse_with_unclosed_tag - s = "contents', node.content - end - - def test_parse_strict_with_unterminated_cdata_section - s = "")) - assert_equal("Dont touch me", sanitizer.sanitize("Dont touch me")) - assert_equal("This is a test.", sanitizer.sanitize("

This is a test.

")) - assert_equal("Weirdos", sanitizer.sanitize("Wei<a onclick='alert(document.cookie);'/>rdos")) - assert_equal("This is a test.", sanitizer.sanitize("This is a test.")) - assert_equal( - %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize( - %{This is <b>a <a href="" target="_blank">test</a></b>.\n\n\n\n

It no longer contains any HTML.

\n})) - assert_equal "This has a here.", sanitizer.sanitize("This has a here.") - assert_equal "This has a here.", sanitizer.sanitize("This has a ]]> here.") - assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed ]] here...") - [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) } - end - - def test_strip_links - sanitizer = HTML::LinkSanitizer.new - assert_equal "Dont touch me", sanitizer.sanitize("Dont touch me") - assert_equal "on my mind\nall day long", sanitizer.sanitize("on my mind\nall day long") - assert_equal "0wn3d", sanitizer.sanitize("0wn3d") - assert_equal "Magic", sanitizer.sanitize("Magic") - assert_equal "FrrFox", sanitizer.sanitize("FrrFox") - assert_equal "My mind\nall day long", sanitizer.sanitize("My mind\nall day long") - assert_equal "all day long", sanitizer.sanitize("<a href='hello'>all day long</a>") - - assert_equal "", '' - end - - def test_sanitize_plaintext - raw = "<span>foo</span></plaintext>" - assert_sanitized raw, "<span>foo</span>" - end - - def test_sanitize_script - assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cd e f" - end - - # fucked - def test_sanitize_js_handlers - raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>} - assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>} - end - - def test_sanitize_javascript_href - raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>} - assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>} - end - - def test_sanitize_image_src - raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} - assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} - end - - HTML::WhiteListSanitizer.allowed_tags.each do |tag_name| - define_method "test_should_allow_#{tag_name}_tag" do - assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) - end - end - - def test_should_allow_anchors - assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>) - end - - # RFC 3986, sec 4.2 - def test_allow_colons_in_path_component - assert_sanitized("<a href=\"./this:that\">foo</a>") - end - - %w(src width height alt).each do |img_attr| - define_method "test_should_allow_image_#{img_attr}_attribute" do - assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) - end - end - - def test_should_handle_non_html - assert_sanitized 'abc' - end - - def test_should_handle_blank_text - assert_sanitized nil - assert_sanitized '' - end - - def test_should_allow_custom_tags - text = "<u>foo</u>" - sanitizer = HTML::WhiteListSanitizer.new - assert_equal(text, sanitizer.sanitize(text, :tags => %w(u))) - end - - def test_should_allow_only_custom_tags - text = "<u>foo</u> with <i>bar</i>" - sanitizer = HTML::WhiteListSanitizer.new - assert_equal("<u>foo</u> with bar", sanitizer.sanitize(text, :tags => %w(u))) - end - - def test_should_allow_custom_tags_with_attributes - text = %(<blockquote cite="http://example.com/">foo</blockquote>) - sanitizer = HTML::WhiteListSanitizer.new - assert_equal(text, sanitizer.sanitize(text)) - end - - def test_should_allow_custom_tags_with_custom_attributes - text = %(<blockquote foo="bar">Lorem ipsum</blockquote>) - sanitizer = HTML::WhiteListSanitizer.new - assert_equal(text, sanitizer.sanitize(text, :attributes => ['foo'])) - end - - [%w(img src), %w(a href)].each do |(tag, attr)| - define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do - assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) - end - end - - def test_should_flag_bad_protocols - sanitizer = HTML::WhiteListSanitizer.new - %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto| - assert sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://bad") - end - end - - def test_should_accept_good_protocols - sanitizer = HTML::WhiteListSanitizer.new - HTML::WhiteListSanitizer.allowed_protocols.each do |proto| - assert !sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://good") - end - end - - def test_should_reject_hex_codes_in_protocol - assert_sanitized %(<a href="&#37;6A&#37;61&#37;76&#37;61&#37;73&#37;63&#37;72&#37;69&#37;70&#37;74&#37;3A&#37;61&#37;6C&#37;65&#37;72&#37;74&#37;28&#37;22&#37;58&#37;53&#37;53&#37;22&#37;29">1</a>), "<a>1</a>" - assert @sanitizer.send(:contains_bad_protocols?, 'src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29") - end - - def test_should_block_script_tag - assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "" - end - - [%(<IMG SRC="javascript:alert('XSS');">), - %(<IMG SRC=javascript:alert('XSS')>), - %(<IMG SRC=JaVaScRiPt:alert('XSS')>), - %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), - %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>), - %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>), - %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>), - %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>), - %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>), - %(<IMG SRC="jav\tascript:alert('XSS');">), - %(<IMG SRC="jav&#x09;ascript:alert('XSS');">), - %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">), - %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">), - %(<IMG SRC=" &#14; javascript:alert('XSS');">), - %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i| - define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do - assert_sanitized img_hack, "<img>" - end - end - - def test_should_sanitize_tag_broken_up_by_null - assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")" - end - - def test_should_sanitize_invalid_script_tag - assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "" - end - - def test_should_sanitize_script_tag_with_multiple_open_brackets - assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;" - assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(&lt;a) - end - - def test_should_sanitize_unclosed_script - assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>" - end - - def test_should_sanitize_half_open_scripts - assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>" - end - - def test_should_not_fall_for_ridiculous_hack - img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>) - assert_sanitized img_hack, "<img>" - end - - # fucked - def test_should_sanitize_attributes - assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>) - end - - def test_should_sanitize_illegal_style_properties - raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) - expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;) - assert_equal expected, sanitize_css(raw) - end - - def test_should_sanitize_with_trailing_space - raw = "display:block; " - expected = "display: block;" - assert_equal expected, sanitize_css(raw) - end - - def test_should_sanitize_xul_style_attributes - raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_invalid_tag_names - assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f") - end - - def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags - assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>") - end - - def test_should_sanitize_invalid_tag_names_in_single_tags - assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") - end - - def test_should_sanitize_img_dynsrc_lowsrc - assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") - end - - def test_should_sanitize_div_background_image_unicode_encoded - raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_div_style_expression - raw = %(width: expression(alert('XSS'));) - assert_equal '', sanitize_css(raw) - end - - def test_should_sanitize_img_vbscript - assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />' - end - - def test_should_sanitize_cdata_section - assert_sanitized "<![CDATA[<span>section</span>]]>", "&lt;![CDATA[&lt;span>section&lt;/span>]]>" - end - - def test_should_sanitize_unterminated_cdata_section - assert_sanitized "<![CDATA[<span>neverending...", "&lt;![CDATA[&lt;span>neverending...]]>" - end - - def test_should_not_mangle_urls_with_ampersand - assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>} - end - -protected - def assert_sanitized(input, expected = nil) - @sanitizer ||= HTML::WhiteListSanitizer.new - if input - assert_dom_equal expected || input, @sanitizer.sanitize(input) - else - assert_nil @sanitizer.sanitize(input) - end - end - - def sanitize_css(input) - (@sanitizer ||= HTML::WhiteListSanitizer.new).sanitize_css(input) - end -end diff --git a/actionpack/test/controller/html-scanner/tag_node_test.rb b/actionpack/test/controller/html-scanner/tag_node_test.rb deleted file mode 100644 index d1d4667378..0000000000 --- a/actionpack/test/controller/html-scanner/tag_node_test.rb +++ /dev/null @@ -1,238 +0,0 @@ -require 'abstract_unit' - -class TagNodeTest < Test::Unit::TestCase - def test_open_without_attributes - node = tag("<tag>") - assert_equal "tag", node.name - assert_equal Hash.new, node.attributes - assert_nil node.closing - end - - def test_open_with_attributes - node = tag("<TAG1 foo=hey_ho x:bar=\"blah blah\" BAZ='blah blah blah' >") - assert_equal "tag1", node.name - assert_equal "hey_ho", node["foo"] - assert_equal "blah blah", node["x:bar"] - assert_equal "blah blah blah", node["baz"] - end - - def test_self_closing_without_attributes - node = tag("<tag/>") - assert_equal "tag", node.name - assert_equal Hash.new, node.attributes - assert_equal :self, node.closing - end - - def test_self_closing_with_attributes - node = tag("<tag a=b/>") - assert_equal "tag", node.name - assert_equal( { "a" => "b" }, node.attributes ) - assert_equal :self, node.closing - end - - def test_closing_without_attributes - node = tag("</tag>") - assert_equal "tag", node.name - assert_nil node.attributes - assert_equal :close, node.closing - end - - def test_bracket_op_when_no_attributes - node = tag("</tag>") - assert_nil node["foo"] - end - - def test_bracket_op_when_attributes - node = tag("<tag a=b/>") - assert_equal "b", node["a"] - end - - def test_attributes_with_escaped_quotes - node = tag("<tag a='b\\'c' b=\"bob \\\"float\\\"\">") - assert_equal "b\\'c", node["a"] - assert_equal "bob \\\"float\\\"", node["b"] - end - - def test_to_s - node = tag("<a b=c d='f' g=\"h 'i'\" />") - assert_equal %(<a b='c' d='f' g='h \\'i\\'' />), node.to_s - end - - def test_tag - assert tag("<tag>").tag? - end - - def test_match_tag_as_string - assert tag("<tag>").match(:tag => "tag") - assert !tag("<tag>").match(:tag => "b") - end - - def test_match_tag_as_regexp - assert tag("<tag>").match(:tag => /t.g/) - assert !tag("<tag>").match(:tag => /t[bqs]g/) - end - - def test_match_attributes_as_string - t = tag("<tag a=something b=else />") - assert t.match(:attributes => {"a" => "something"}) - assert t.match(:attributes => {"b" => "else"}) - end - - def test_match_attributes_as_regexp - t = tag("<tag a=something b=else />") - assert t.match(:attributes => {"a" => /^something$/}) - assert t.match(:attributes => {"b" => /e.*e/}) - assert t.match(:attributes => {"a" => /me..i/, "b" => /.ls.$/}) - end - - def test_match_attributes_as_number - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"a" => 15}) - assert t.match(:attributes => {"b" => 3.1415}) - assert t.match(:attributes => {"a" => 15, "b" => 3.1415}) - end - - def test_match_attributes_exist - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"a" => true}) - assert t.match(:attributes => {"b" => true}) - assert t.match(:attributes => {"a" => true, "b" => true}) - end - - def test_match_attributes_not_exist - t = tag("<tag a=15 b=3.1415 />") - assert t.match(:attributes => {"c" => false}) - assert t.match(:attributes => {"c" => nil}) - assert t.match(:attributes => {"a" => true, "c" => false}) - end - - def test_match_parent_success - t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) - assert t.match(:parent => {:tag => "foo", :attributes => {"k" => /v.l/, "j" => false}}) - end - - def test_match_parent_fail - t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) - assert !t.match(:parent => {:tag => /kafka/}) - end - - def test_match_child_success - t = tag("<tag x:k='something'>") - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert t.match(:child => { :tag => "sib", :attributes => {"v" => /j/}}) - assert t.match(:child => { :attributes => {"a" => "kelly"}}) - end - - def test_match_child_fail - t = tag("<tag x:k='something'>") - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert !t.match(:child => { :tag => "sib", :attributes => {"v" => /r/}}) - assert !t.match(:child => { :attributes => {"v" => false}}) - end - - def test_match_ancestor_success - t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) - assert t.match(:ancestor => {:tag => "parent", :attributes => {"a" => /ll/}}) - assert t.match(:ancestor => {:attributes => {"m" => "vaughn"}}) - end - - def test_match_ancestor_fail - t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) - assert !t.match(:ancestor => {:tag => /^parent/, :attributes => {"v" => /m/}}) - assert !t.match(:ancestor => {:attributes => {"v" => false}}) - end - - def test_match_descendant_success - tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) - assert t.match(:descendant => {:tag => "child", :attributes => {"a" => /ll/}}) - assert t.match(:descendant => {:attributes => {"m" => "vaughn"}}) - end - - def test_match_descendant_fail - tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) - assert !t.match(:descendant => {:tag => /^child/, :attributes => {"v" => /m/}}) - assert !t.match(:descendant => {:attributes => {"v" => false}}) - end - - def test_match_child_count - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<child v=john a=kelly>", t) - tag("<sib m=vaughn v=james>", t) - assert t.match(:children => { :count => 2 }) - assert t.match(:children => { :count => 2..4 }) - assert t.match(:children => { :less_than => 4 }) - assert t.match(:children => { :greater_than => 1 }) - assert !t.match(:children => { :count => 3 }) - end - - def test_conditions_as_strings - t = tag("<tag x:k='something'>") - assert t.match("tag" => "tag") - assert t.match("attributes" => { "x:k" => "something" }) - assert !t.match("tag" => "gat") - assert !t.match("attributes" => { "x:j" => "something" }) - end - - def test_attributes_as_symbols - t = tag("<child v=john a=kelly>") - assert t.match(:attributes => { :v => /oh/ }) - assert t.match(:attributes => { :a => /ll/ }) - end - - def test_match_sibling - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:sibling => {:tag => "span", :attributes => {:a => true}}) - assert m.match(:sibling => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:sibling => {:tag => "span", :attributes => {:k => true}}) - end - - def test_match_sibling_before - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:before => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:before => {:tag => "span", :attributes => {:a => true}}) - assert !m.match(:before => {:tag => "span", :attributes => {:k => true}}) - end - - def test_match_sibling_after - t = tag("<tag x:k='something'>") - tag("hello", t) - tag("<span a=b>", t) - tag("world", t) - m = tag("<span k=r>", t) - tag("<span m=l>", t) - - assert m.match(:after => {:tag => "span", :attributes => {:a => true}}) - assert !m.match(:after => {:tag => "span", :attributes => {:m => true}}) - assert !m.match(:after => {:tag => "span", :attributes => {:k => true}}) - end - - def test_to_s - t = tag("<b x='foo'>") - tag("hello", t) - tag("<hr />", t) - assert_equal %(<b x="foo">hello<hr /></b>), t.to_s - end - - private - - def tag(content, parent=nil) - node = HTML::Node.parse(parent,0,0,content) - parent.children << node if parent - node - end -end diff --git a/actionpack/test/controller/html-scanner/text_node_test.rb b/actionpack/test/controller/html-scanner/text_node_test.rb deleted file mode 100644 index 1ab3f4454e..0000000000 --- a/actionpack/test/controller/html-scanner/text_node_test.rb +++ /dev/null @@ -1,50 +0,0 @@ -require 'abstract_unit' - -class TextNodeTest < Test::Unit::TestCase - def setup - @node = HTML::Text.new(nil, 0, 0, "hello, howdy, aloha, annyeong") - end - - def test_to_s - assert_equal "hello, howdy, aloha, annyeong", @node.to_s - end - - def test_find_string - assert_equal @node, @node.find("hello, howdy, aloha, annyeong") - assert_equal false, @node.find("bogus") - end - - def test_find_regexp - assert_equal @node, @node.find(/an+y/) - assert_nil @node.find(/b/) - end - - def test_find_hash - assert_equal @node, @node.find(:content => /howdy/) - assert_nil @node.find(:content => /^howdy$/) - assert_equal false, @node.find(:content => "howdy") - end - - def test_find_other - assert_nil @node.find(:hello) - end - - def test_match_string - assert @node.match("hello, howdy, aloha, annyeong") - assert_equal false, @node.match("bogus") - end - - def test_match_regexp - assert_not_nil @node, @node.match(/an+y/) - assert_nil @node.match(/b/) - end - - def test_match_hash - assert_not_nil @node, @node.match(:content => "howdy") - assert_nil @node.match(:content => /^howdy$/) - end - - def test_match_other - assert_nil @node.match(:hello) - end -end diff --git a/actionpack/test/controller/html-scanner/tokenizer_test.rb b/actionpack/test/controller/html-scanner/tokenizer_test.rb deleted file mode 100644 index a001bcbbad..0000000000 --- a/actionpack/test/controller/html-scanner/tokenizer_test.rb +++ /dev/null @@ -1,131 +0,0 @@ -require 'abstract_unit' - -class TokenizerTest < Test::Unit::TestCase - - def test_blank - tokenize "" - assert_end - end - - def test_space - tokenize " " - assert_next " " - assert_end - end - - def test_tag_simple_open - tokenize "<tag>" - assert_next "<tag>" - assert_end - end - - def test_tag_simple_self_closing - tokenize "<tag />" - assert_next "<tag />" - assert_end - end - - def test_tag_simple_closing - tokenize "</tag>" - assert_next "</tag>" - end - - def test_tag_with_single_quoted_attribute - tokenize %{<tag a='hello'>x} - assert_next %{<tag a='hello'>} - end - - def test_tag_with_single_quoted_attribute_with_escape - tokenize %{<tag a='hello\\''>x} - assert_next %{<tag a='hello\\''>} - end - - def test_tag_with_double_quoted_attribute - tokenize %{<tag a="hello">x} - assert_next %{<tag a="hello">} - end - - def test_tag_with_double_quoted_attribute_with_escape - tokenize %{<tag a="hello\\"">x} - assert_next %{<tag a="hello\\"">} - end - - def test_tag_with_unquoted_attribute - tokenize %{<tag a=hello>x} - assert_next %{<tag a=hello>} - end - - def test_tag_with_lt_char_in_attribute - tokenize %{<tag a="x < y">x} - assert_next %{<tag a="x < y">} - end - - def test_tag_with_gt_char_in_attribute - tokenize %{<tag a="x > y">x} - assert_next %{<tag a="x > y">} - end - - def test_doctype_tag - tokenize %{<!DOCTYPE "blah" "blah" "blah">\n <html>} - assert_next %{<!DOCTYPE "blah" "blah" "blah">} - assert_next %{\n } - assert_next %{<html>} - end - - def test_cdata_tag - tokenize %{<![CDATA[<br>]]>} - assert_next %{<![CDATA[<br>]]>} - assert_end - end - - def test_unterminated_cdata_tag - tokenize %{<content:encoded><![CDATA[ neverending...} - assert_next %{<content:encoded>} - assert_next %{<![CDATA[ neverending...} - assert_end - end - - def test_less_than_with_space - tokenize %{original < hello > world} - assert_next %{original } - assert_next %{< hello > world} - end - - def test_less_than_without_matching_greater_than - tokenize %{hello <span onmouseover="gotcha"\n<b>foo</b>\nbar</span>} - assert_next %{hello } - assert_next %{<span onmouseover="gotcha"\n} - assert_next %{<b>} - assert_next %{foo} - assert_next %{</b>} - assert_next %{\nbar} - assert_next %{</span>} - assert_end - end - - def test_unterminated_comment - tokenize %{hello <!-- neverending...} - assert_next %{hello } - assert_next %{<!-- neverending...} - assert_end - end - - private - - def tokenize(text) - @tokenizer = HTML::Tokenizer.new(text) - end - - def assert_next(expected, message=nil) - token = @tokenizer.next - assert_equal expected, token, message - end - - def assert_sequence(*expected) - assert_next expected.shift until expected.empty? - end - - def assert_end(message=nil) - assert_nil @tokenizer.next, message - end -end diff --git a/actionpack/test/html-scanner/cdata_node_test.rb b/actionpack/test/html-scanner/cdata_node_test.rb new file mode 100644 index 0000000000..1822cc565a --- /dev/null +++ b/actionpack/test/html-scanner/cdata_node_test.rb @@ -0,0 +1,15 @@ +require 'abstract_unit' + +class CDATANodeTest < Test::Unit::TestCase + def setup + @node = HTML::CDATA.new(nil, 0, 0, "<p>howdy</p>") + end + + def test_to_s + assert_equal "<![CDATA[<p>howdy</p>]]>", @node.to_s + end + + def test_content + assert_equal "<p>howdy</p>", @node.content + end +end diff --git a/actionpack/test/html-scanner/document_test.rb b/actionpack/test/html-scanner/document_test.rb new file mode 100644 index 0000000000..c68f04fa75 --- /dev/null +++ b/actionpack/test/html-scanner/document_test.rb @@ -0,0 +1,148 @@ +require 'abstract_unit' + +class DocumentTest < Test::Unit::TestCase + def test_handle_doctype + doc = nil + assert_nothing_raised do + doc = HTML::Document.new <<-HTML.strip + <!DOCTYPE "blah" "blah" "blah"> + <html> + </html> + HTML + end + assert_equal 3, doc.root.children.length + assert_equal %{<!DOCTYPE "blah" "blah" "blah">}, doc.root.children[0].content + assert_match %r{\s+}m, doc.root.children[1].content + assert_equal "html", doc.root.children[2].name + end + + def test_find_img + doc = HTML::Document.new <<-HTML.strip + <html> + <body> + <p><img src="hello.gif"></p> + </body> + </html> + HTML + assert doc.find(:tag=>"img", :attributes=>{"src"=>"hello.gif"}) + end + + def test_find_all + doc = HTML::Document.new <<-HTML.strip + <html> + <body> + <p class="test"><img src="hello.gif"></p> + <div class="foo"> + <p class="test">something</p> + <p>here is <em class="test">more</em></p> + </div> + </body> + </html> + HTML + all = doc.find_all :attributes => { :class => "test" } + assert_equal 3, all.length + assert_equal [ "p", "p", "em" ], all.map { |n| n.name } + end + + def test_find_with_text + doc = HTML::Document.new <<-HTML.strip + <html> + <body> + <p>Some text</p> + </body> + </html> + HTML + assert doc.find(:content => "Some text") + assert doc.find(:tag => "p", :child => { :content => "Some text" }) + assert doc.find(:tag => "p", :child => "Some text") + assert doc.find(:tag => "p", :content => "Some text") + end + + def test_parse_xml + assert_nothing_raised { HTML::Document.new("<tags><tag/></tags>", true, true) } + assert_nothing_raised { HTML::Document.new("<outer><link>something</link></outer>", true, true) } + end + + def test_parse_document + doc = HTML::Document.new(<<-HTML) + <div> + <h2>blah</h2> + <table> + </table> + </div> + HTML + assert_not_nil doc.find(:tag => "div", :children => { :count => 1, :only => { :tag => "table" } }) + end + + def test_tag_nesting_nothing_to_s + doc = HTML::Document.new("<tag></tag>") + assert_equal "<tag></tag>", doc.root.to_s + end + + def test_tag_nesting_space_to_s + doc = HTML::Document.new("<tag> </tag>") + assert_equal "<tag> </tag>", doc.root.to_s + end + + def test_tag_nesting_text_to_s + doc = HTML::Document.new("<tag>text</tag>") + assert_equal "<tag>text</tag>", doc.root.to_s + end + + def test_tag_nesting_tag_to_s + doc = HTML::Document.new("<tag><nested /></tag>") + assert_equal "<tag><nested /></tag>", doc.root.to_s + end + + def test_parse_cdata + doc = HTML::Document.new(<<-HTML) +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> + <head> + <title><![CDATA[<br>]]></title> + </head> + <body> + <p>this document has &lt;br&gt; for a title</p> + </body> +</html> +HTML + + assert_nil doc.find(:tag => "title", :descendant => { :tag => "br" }) + assert doc.find(:tag => "title", :child => "<br>") + end + + def test_find_empty_tag + doc = HTML::Document.new("<div id='map'></div>") + assert_nil doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /./) + assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /\A\Z/) + assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => /^$/) + assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => "") + assert doc.find(:tag => "div", :attributes => { :id => "map" }, :content => nil) + end + + def test_parse_invalid_document + assert_nothing_raised do + doc = HTML::Document.new("<html> + <table> + <tr> + <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td> + </tr> + </table> + </html>") + end + end + + def test_invalid_document_raises_exception_when_strict + assert_raise RuntimeError do + doc = HTML::Document.new("<html> + <table> + <tr> + <td style=\"color: #FFFFFF; height: 17px; onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" style=\"cursor:pointer; height: 17px;\"; nowrap onclick=\"window.location.href='http://www.rmeinc.com/about_rme.aspx'\" onmouseout=\"this.bgColor='#0066cc'; this.style.color='#FFFFFF'\" onmouseover=\"this.bgColor='#ffffff'; this.style.color='#0033cc'\">About Us</td> + </tr> + </table> + </html>", true) + end + end + +end diff --git a/actionpack/test/html-scanner/node_test.rb b/actionpack/test/html-scanner/node_test.rb new file mode 100644 index 0000000000..b0df36877e --- /dev/null +++ b/actionpack/test/html-scanner/node_test.rb @@ -0,0 +1,89 @@ +require 'abstract_unit' + +class NodeTest < Test::Unit::TestCase + + class MockNode + def initialize(matched, value) + @matched = matched + @value = value + end + + def find(conditions) + @matched && self + end + + def to_s + @value.to_s + end + end + + def setup + @node = HTML::Node.new("parent") + @node.children.concat [MockNode.new(false,1), MockNode.new(true,"two"), MockNode.new(false,:three)] + end + + def test_match + assert !@node.match("foo") + end + + def test_tag + assert !@node.tag? + end + + def test_to_s + assert_equal "1twothree", @node.to_s + end + + def test_find + assert_equal "two", @node.find('blah').to_s + end + + def test_parse_strict + s = "<b foo='hello'' bar='baz'>" + assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) } + end + + def test_parse_relaxed + s = "<b foo='hello'' bar='baz'>" + node = nil + assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } + assert node.attributes.has_key?("foo") + assert !node.attributes.has_key?("bar") + end + + def test_to_s_with_boolean_attrs + s = "<b foo bar>" + node = HTML::Node.parse(nil,0,0,s) + assert node.attributes.has_key?("foo") + assert node.attributes.has_key?("bar") + assert "<b foo bar>", node.to_s + end + + def test_parse_with_unclosed_tag + s = "<span onmouseover='bang'" + node = nil + assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } + assert node.attributes.has_key?("onmouseover") + end + + def test_parse_with_valid_cdata_section + s = "<![CDATA[<span>contents</span>]]>" + node = nil + assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } + assert_kind_of HTML::CDATA, node + assert_equal '<span>contents</span>', node.content + end + + def test_parse_strict_with_unterminated_cdata_section + s = "<![CDATA[neverending..." + assert_raise(RuntimeError) { HTML::Node.parse(nil,0,0,s) } + end + + def test_parse_relaxed_with_unterminated_cdata_section + s = "<![CDATA[neverending..." + node = nil + assert_nothing_raised { node = HTML::Node.parse(nil,0,0,s,false) } + assert_kind_of HTML::CDATA, node + assert_equal 'neverending...', node.content + end +end diff --git a/actionpack/test/html-scanner/sanitizer_test.rb b/actionpack/test/html-scanner/sanitizer_test.rb new file mode 100644 index 0000000000..e85a5c7abf --- /dev/null +++ b/actionpack/test/html-scanner/sanitizer_test.rb @@ -0,0 +1,273 @@ +require 'abstract_unit' + +class SanitizerTest < ActionController::TestCase + def setup + @sanitizer = nil # used by assert_sanitizer + end + + def test_strip_tags + sanitizer = HTML::FullSanitizer.new + assert_equal("<<<bad html", sanitizer.sanitize("<<<bad html")) + assert_equal("<<", sanitizer.sanitize("<<<bad html>")) + assert_equal("Dont touch me", sanitizer.sanitize("Dont touch me")) + assert_equal("This is a test.", sanitizer.sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")) + assert_equal("Weirdos", sanitizer.sanitize("Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos")) + assert_equal("This is a test.", sanitizer.sanitize("This is a test.")) + assert_equal( + %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize( + %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n})) + assert_equal "This has a here.", sanitizer.sanitize("This has a <!-- comment --> here.") + assert_equal "This has a here.", sanitizer.sanitize("This has a <![CDATA[<section>]]> here.") + assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed <![CDATA[<section>]] here...") + [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) } + end + + def test_strip_links + sanitizer = HTML::LinkSanitizer.new + assert_equal "Dont touch me", sanitizer.sanitize("Dont touch me") + assert_equal "on my mind\nall day long", sanitizer.sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>") + assert_equal "0wn3d", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>") + assert_equal "Magic", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic") + assert_equal "FrrFox", sanitizer.sanitize("<href onlclick='steal()'>FrrFox</a></href>") + assert_equal "My mind\nall <b>day</b> long", sanitizer.sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>") + assert_equal "all <b>day</b> long", sanitizer.sanitize("<<a>a href='hello'>all <b>day</b> long<</A>/a>") + + assert_equal "<a<a", sanitizer.sanitize("<a<a") + end + + def test_sanitize_form + assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", '' + end + + def test_sanitize_plaintext + raw = "<plaintext><span>foo</span></plaintext>" + assert_sanitized raw, "<span>foo</span>" + end + + def test_sanitize_script + assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cd e f" + end + + # fucked + def test_sanitize_js_handlers + raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>} + assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>} + end + + def test_sanitize_javascript_href + raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>} + assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>} + end + + def test_sanitize_image_src + raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>} + assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>} + end + + HTML::WhiteListSanitizer.allowed_tags.each do |tag_name| + define_method "test_should_allow_#{tag_name}_tag" do + assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end) + end + end + + def test_should_allow_anchors + assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>) + end + + # RFC 3986, sec 4.2 + def test_allow_colons_in_path_component + assert_sanitized("<a href=\"./this:that\">foo</a>") + end + + %w(src width height alt).each do |img_attr| + define_method "test_should_allow_image_#{img_attr}_attribute" do + assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />) + end + end + + def test_should_handle_non_html + assert_sanitized 'abc' + end + + def test_should_handle_blank_text + assert_sanitized nil + assert_sanitized '' + end + + def test_should_allow_custom_tags + text = "<u>foo</u>" + sanitizer = HTML::WhiteListSanitizer.new + assert_equal(text, sanitizer.sanitize(text, :tags => %w(u))) + end + + def test_should_allow_only_custom_tags + text = "<u>foo</u> with <i>bar</i>" + sanitizer = HTML::WhiteListSanitizer.new + assert_equal("<u>foo</u> with bar", sanitizer.sanitize(text, :tags => %w(u))) + end + + def test_should_allow_custom_tags_with_attributes + text = %(<blockquote cite="http://example.com/">foo</blockquote>) + sanitizer = HTML::WhiteListSanitizer.new + assert_equal(text, sanitizer.sanitize(text)) + end + + def test_should_allow_custom_tags_with_custom_attributes + text = %(<blockquote foo="bar">Lorem ipsum</blockquote>) + sanitizer = HTML::WhiteListSanitizer.new + assert_equal(text, sanitizer.sanitize(text, :attributes => ['foo'])) + end + + [%w(img src), %w(a href)].each do |(tag, attr)| + define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do + assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>) + end + end + + def test_should_flag_bad_protocols + sanitizer = HTML::WhiteListSanitizer.new + %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto| + assert sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://bad") + end + end + + def test_should_accept_good_protocols + sanitizer = HTML::WhiteListSanitizer.new + HTML::WhiteListSanitizer.allowed_protocols.each do |proto| + assert !sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://good") + end + end + + def test_should_reject_hex_codes_in_protocol + assert_sanitized %(<a href="&#37;6A&#37;61&#37;76&#37;61&#37;73&#37;63&#37;72&#37;69&#37;70&#37;74&#37;3A&#37;61&#37;6C&#37;65&#37;72&#37;74&#37;28&#37;22&#37;58&#37;53&#37;53&#37;22&#37;29">1</a>), "<a>1</a>" + assert @sanitizer.send(:contains_bad_protocols?, 'src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29") + end + + def test_should_block_script_tag + assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), "" + end + + [%(<IMG SRC="javascript:alert('XSS');">), + %(<IMG SRC=javascript:alert('XSS')>), + %(<IMG SRC=JaVaScRiPt:alert('XSS')>), + %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), + %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>), + %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>), + %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>), + %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>), + %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>), + %(<IMG SRC="jav\tascript:alert('XSS');">), + %(<IMG SRC="jav&#x09;ascript:alert('XSS');">), + %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">), + %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">), + %(<IMG SRC=" &#14; javascript:alert('XSS');">), + %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i| + define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do + assert_sanitized img_hack, "<img>" + end + end + + def test_should_sanitize_tag_broken_up_by_null + assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")" + end + + def test_should_sanitize_invalid_script_tag + assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), "" + end + + def test_should_sanitize_script_tag_with_multiple_open_brackets + assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;" + assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(&lt;a) + end + + def test_should_sanitize_unclosed_script + assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>" + end + + def test_should_sanitize_half_open_scripts + assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>" + end + + def test_should_not_fall_for_ridiculous_hack + img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>) + assert_sanitized img_hack, "<img>" + end + + # fucked + def test_should_sanitize_attributes + assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>) + end + + def test_should_sanitize_illegal_style_properties + raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;) + expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;) + assert_equal expected, sanitize_css(raw) + end + + def test_should_sanitize_with_trailing_space + raw = "display:block; " + expected = "display: block;" + assert_equal expected, sanitize_css(raw) + end + + def test_should_sanitize_xul_style_attributes + raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_invalid_tag_names + assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f") + end + + def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags + assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>") + end + + def test_should_sanitize_invalid_tag_names_in_single_tags + assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />") + end + + def test_should_sanitize_img_dynsrc_lowsrc + assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />") + end + + def test_should_sanitize_div_background_image_unicode_encoded + raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_div_style_expression + raw = %(width: expression(alert('XSS'));) + assert_equal '', sanitize_css(raw) + end + + def test_should_sanitize_img_vbscript + assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />' + end + + def test_should_sanitize_cdata_section + assert_sanitized "<![CDATA[<span>section</span>]]>", "&lt;![CDATA[&lt;span>section&lt;/span>]]>" + end + + def test_should_sanitize_unterminated_cdata_section + assert_sanitized "<![CDATA[<span>neverending...", "&lt;![CDATA[&lt;span>neverending...]]>" + end + + def test_should_not_mangle_urls_with_ampersand + assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>} + end + +protected + def assert_sanitized(input, expected = nil) + @sanitizer ||= HTML::WhiteListSanitizer.new + if input + assert_dom_equal expected || input, @sanitizer.sanitize(input) + else + assert_nil @sanitizer.sanitize(input) + end + end + + def sanitize_css(input) + (@sanitizer ||= HTML::WhiteListSanitizer.new).sanitize_css(input) + end +end diff --git a/actionpack/test/html-scanner/tag_node_test.rb b/actionpack/test/html-scanner/tag_node_test.rb new file mode 100644 index 0000000000..d1d4667378 --- /dev/null +++ b/actionpack/test/html-scanner/tag_node_test.rb @@ -0,0 +1,238 @@ +require 'abstract_unit' + +class TagNodeTest < Test::Unit::TestCase + def test_open_without_attributes + node = tag("<tag>") + assert_equal "tag", node.name + assert_equal Hash.new, node.attributes + assert_nil node.closing + end + + def test_open_with_attributes + node = tag("<TAG1 foo=hey_ho x:bar=\"blah blah\" BAZ='blah blah blah' >") + assert_equal "tag1", node.name + assert_equal "hey_ho", node["foo"] + assert_equal "blah blah", node["x:bar"] + assert_equal "blah blah blah", node["baz"] + end + + def test_self_closing_without_attributes + node = tag("<tag/>") + assert_equal "tag", node.name + assert_equal Hash.new, node.attributes + assert_equal :self, node.closing + end + + def test_self_closing_with_attributes + node = tag("<tag a=b/>") + assert_equal "tag", node.name + assert_equal( { "a" => "b" }, node.attributes ) + assert_equal :self, node.closing + end + + def test_closing_without_attributes + node = tag("</tag>") + assert_equal "tag", node.name + assert_nil node.attributes + assert_equal :close, node.closing + end + + def test_bracket_op_when_no_attributes + node = tag("</tag>") + assert_nil node["foo"] + end + + def test_bracket_op_when_attributes + node = tag("<tag a=b/>") + assert_equal "b", node["a"] + end + + def test_attributes_with_escaped_quotes + node = tag("<tag a='b\\'c' b=\"bob \\\"float\\\"\">") + assert_equal "b\\'c", node["a"] + assert_equal "bob \\\"float\\\"", node["b"] + end + + def test_to_s + node = tag("<a b=c d='f' g=\"h 'i'\" />") + assert_equal %(<a b='c' d='f' g='h \\'i\\'' />), node.to_s + end + + def test_tag + assert tag("<tag>").tag? + end + + def test_match_tag_as_string + assert tag("<tag>").match(:tag => "tag") + assert !tag("<tag>").match(:tag => "b") + end + + def test_match_tag_as_regexp + assert tag("<tag>").match(:tag => /t.g/) + assert !tag("<tag>").match(:tag => /t[bqs]g/) + end + + def test_match_attributes_as_string + t = tag("<tag a=something b=else />") + assert t.match(:attributes => {"a" => "something"}) + assert t.match(:attributes => {"b" => "else"}) + end + + def test_match_attributes_as_regexp + t = tag("<tag a=something b=else />") + assert t.match(:attributes => {"a" => /^something$/}) + assert t.match(:attributes => {"b" => /e.*e/}) + assert t.match(:attributes => {"a" => /me..i/, "b" => /.ls.$/}) + end + + def test_match_attributes_as_number + t = tag("<tag a=15 b=3.1415 />") + assert t.match(:attributes => {"a" => 15}) + assert t.match(:attributes => {"b" => 3.1415}) + assert t.match(:attributes => {"a" => 15, "b" => 3.1415}) + end + + def test_match_attributes_exist + t = tag("<tag a=15 b=3.1415 />") + assert t.match(:attributes => {"a" => true}) + assert t.match(:attributes => {"b" => true}) + assert t.match(:attributes => {"a" => true, "b" => true}) + end + + def test_match_attributes_not_exist + t = tag("<tag a=15 b=3.1415 />") + assert t.match(:attributes => {"c" => false}) + assert t.match(:attributes => {"c" => nil}) + assert t.match(:attributes => {"a" => true, "c" => false}) + end + + def test_match_parent_success + t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) + assert t.match(:parent => {:tag => "foo", :attributes => {"k" => /v.l/, "j" => false}}) + end + + def test_match_parent_fail + t = tag("<tag a=15 b='hello'>", tag("<foo k='value'>")) + assert !t.match(:parent => {:tag => /kafka/}) + end + + def test_match_child_success + t = tag("<tag x:k='something'>") + tag("<child v=john a=kelly>", t) + tag("<sib m=vaughn v=james>", t) + assert t.match(:child => { :tag => "sib", :attributes => {"v" => /j/}}) + assert t.match(:child => { :attributes => {"a" => "kelly"}}) + end + + def test_match_child_fail + t = tag("<tag x:k='something'>") + tag("<child v=john a=kelly>", t) + tag("<sib m=vaughn v=james>", t) + assert !t.match(:child => { :tag => "sib", :attributes => {"v" => /r/}}) + assert !t.match(:child => { :attributes => {"v" => false}}) + end + + def test_match_ancestor_success + t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) + assert t.match(:ancestor => {:tag => "parent", :attributes => {"a" => /ll/}}) + assert t.match(:ancestor => {:attributes => {"m" => "vaughn"}}) + end + + def test_match_ancestor_fail + t = tag("<tag x:k='something'>", tag("<parent v=john a=kelly>", tag("<grandparent m=vaughn v=james>"))) + assert !t.match(:ancestor => {:tag => /^parent/, :attributes => {"v" => /m/}}) + assert !t.match(:ancestor => {:attributes => {"v" => false}}) + end + + def test_match_descendant_success + tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) + assert t.match(:descendant => {:tag => "child", :attributes => {"a" => /ll/}}) + assert t.match(:descendant => {:attributes => {"m" => "vaughn"}}) + end + + def test_match_descendant_fail + tag("<grandchild m=vaughn v=james>", tag("<child v=john a=kelly>", t = tag("<tag x:k='something'>"))) + assert !t.match(:descendant => {:tag => /^child/, :attributes => {"v" => /m/}}) + assert !t.match(:descendant => {:attributes => {"v" => false}}) + end + + def test_match_child_count + t = tag("<tag x:k='something'>") + tag("hello", t) + tag("<child v=john a=kelly>", t) + tag("<sib m=vaughn v=james>", t) + assert t.match(:children => { :count => 2 }) + assert t.match(:children => { :count => 2..4 }) + assert t.match(:children => { :less_than => 4 }) + assert t.match(:children => { :greater_than => 1 }) + assert !t.match(:children => { :count => 3 }) + end + + def test_conditions_as_strings + t = tag("<tag x:k='something'>") + assert t.match("tag" => "tag") + assert t.match("attributes" => { "x:k" => "something" }) + assert !t.match("tag" => "gat") + assert !t.match("attributes" => { "x:j" => "something" }) + end + + def test_attributes_as_symbols + t = tag("<child v=john a=kelly>") + assert t.match(:attributes => { :v => /oh/ }) + assert t.match(:attributes => { :a => /ll/ }) + end + + def test_match_sibling + t = tag("<tag x:k='something'>") + tag("hello", t) + tag("<span a=b>", t) + tag("world", t) + m = tag("<span k=r>", t) + tag("<span m=l>", t) + + assert m.match(:sibling => {:tag => "span", :attributes => {:a => true}}) + assert m.match(:sibling => {:tag => "span", :attributes => {:m => true}}) + assert !m.match(:sibling => {:tag => "span", :attributes => {:k => true}}) + end + + def test_match_sibling_before + t = tag("<tag x:k='something'>") + tag("hello", t) + tag("<span a=b>", t) + tag("world", t) + m = tag("<span k=r>", t) + tag("<span m=l>", t) + + assert m.match(:before => {:tag => "span", :attributes => {:m => true}}) + assert !m.match(:before => {:tag => "span", :attributes => {:a => true}}) + assert !m.match(:before => {:tag => "span", :attributes => {:k => true}}) + end + + def test_match_sibling_after + t = tag("<tag x:k='something'>") + tag("hello", t) + tag("<span a=b>", t) + tag("world", t) + m = tag("<span k=r>", t) + tag("<span m=l>", t) + + assert m.match(:after => {:tag => "span", :attributes => {:a => true}}) + assert !m.match(:after => {:tag => "span", :attributes => {:m => true}}) + assert !m.match(:after => {:tag => "span", :attributes => {:k => true}}) + end + + def test_to_s + t = tag("<b x='foo'>") + tag("hello", t) + tag("<hr />", t) + assert_equal %(<b x="foo">hello<hr /></b>), t.to_s + end + + private + + def tag(content, parent=nil) + node = HTML::Node.parse(parent,0,0,content) + parent.children << node if parent + node + end +end diff --git a/actionpack/test/html-scanner/text_node_test.rb b/actionpack/test/html-scanner/text_node_test.rb new file mode 100644 index 0000000000..1ab3f4454e --- /dev/null +++ b/actionpack/test/html-scanner/text_node_test.rb @@ -0,0 +1,50 @@ +require 'abstract_unit' + +class TextNodeTest < Test::Unit::TestCase + def setup + @node = HTML::Text.new(nil, 0, 0, "hello, howdy, aloha, annyeong") + end + + def test_to_s + assert_equal "hello, howdy, aloha, annyeong", @node.to_s + end + + def test_find_string + assert_equal @node, @node.find("hello, howdy, aloha, annyeong") + assert_equal false, @node.find("bogus") + end + + def test_find_regexp + assert_equal @node, @node.find(/an+y/) + assert_nil @node.find(/b/) + end + + def test_find_hash + assert_equal @node, @node.find(:content => /howdy/) + assert_nil @node.find(:content => /^howdy$/) + assert_equal false, @node.find(:content => "howdy") + end + + def test_find_other + assert_nil @node.find(:hello) + end + + def test_match_string + assert @node.match("hello, howdy, aloha, annyeong") + assert_equal false, @node.match("bogus") + end + + def test_match_regexp + assert_not_nil @node, @node.match(/an+y/) + assert_nil @node.match(/b/) + end + + def test_match_hash + assert_not_nil @node, @node.match(:content => "howdy") + assert_nil @node.match(:content => /^howdy$/) + end + + def test_match_other + assert_nil @node.match(:hello) + end +end diff --git a/actionpack/test/html-scanner/tokenizer_test.rb b/actionpack/test/html-scanner/tokenizer_test.rb new file mode 100644 index 0000000000..a001bcbbad --- /dev/null +++ b/actionpack/test/html-scanner/tokenizer_test.rb @@ -0,0 +1,131 @@ +require 'abstract_unit' + +class TokenizerTest < Test::Unit::TestCase + + def test_blank + tokenize "" + assert_end + end + + def test_space + tokenize " " + assert_next " " + assert_end + end + + def test_tag_simple_open + tokenize "<tag>" + assert_next "<tag>" + assert_end + end + + def test_tag_simple_self_closing + tokenize "<tag />" + assert_next "<tag />" + assert_end + end + + def test_tag_simple_closing + tokenize "</tag>" + assert_next "</tag>" + end + + def test_tag_with_single_quoted_attribute + tokenize %{<tag a='hello'>x} + assert_next %{<tag a='hello'>} + end + + def test_tag_with_single_quoted_attribute_with_escape + tokenize %{<tag a='hello\\''>x} + assert_next %{<tag a='hello\\''>} + end + + def test_tag_with_double_quoted_attribute + tokenize %{<tag a="hello">x} + assert_next %{<tag a="hello">} + end + + def test_tag_with_double_quoted_attribute_with_escape + tokenize %{<tag a="hello\\"">x} + assert_next %{<tag a="hello\\"">} + end + + def test_tag_with_unquoted_attribute + tokenize %{<tag a=hello>x} + assert_next %{<tag a=hello>} + end + + def test_tag_with_lt_char_in_attribute + tokenize %{<tag a="x < y">x} + assert_next %{<tag a="x < y">} + end + + def test_tag_with_gt_char_in_attribute + tokenize %{<tag a="x > y">x} + assert_next %{<tag a="x > y">} + end + + def test_doctype_tag + tokenize %{<!DOCTYPE "blah" "blah" "blah">\n <html>} + assert_next %{<!DOCTYPE "blah" "blah" "blah">} + assert_next %{\n } + assert_next %{<html>} + end + + def test_cdata_tag + tokenize %{<![CDATA[<br>]]>} + assert_next %{<![CDATA[<br>]]>} + assert_end + end + + def test_unterminated_cdata_tag + tokenize %{<content:encoded><![CDATA[ neverending...} + assert_next %{<content:encoded>} + assert_next %{<![CDATA[ neverending...} + assert_end + end + + def test_less_than_with_space + tokenize %{original < hello > world} + assert_next %{original } + assert_next %{< hello > world} + end + + def test_less_than_without_matching_greater_than + tokenize %{hello <span onmouseover="gotcha"\n<b>foo</b>\nbar</span>} + assert_next %{hello } + assert_next %{<span onmouseover="gotcha"\n} + assert_next %{<b>} + assert_next %{foo} + assert_next %{</b>} + assert_next %{\nbar} + assert_next %{</span>} + assert_end + end + + def test_unterminated_comment + tokenize %{hello <!-- neverending...} + assert_next %{hello } + assert_next %{<!-- neverending...} + assert_end + end + + private + + def tokenize(text) + @tokenizer = HTML::Tokenizer.new(text) + end + + def assert_next(expected, message=nil) + token = @tokenizer.next + assert_equal expected, token, message + end + + def assert_sequence(*expected) + assert_next expected.shift until expected.empty? + end + + def assert_end(message=nil) + assert_nil @tokenizer.next, message + end +end -- cgit v1.2.3