diff options
author | Willem van Bergen <willem@vanbergen.org> | 2010-01-01 12:08:48 +0100 |
---|---|---|
committer | Jeremy Kemper <jeremy@bitsweat.net> | 2010-01-01 13:17:45 -0800 |
commit | d7f9b9fd244228d3503d7d37ac2f07365d54df3c (patch) | |
tree | 1c4a59e7ff70b4beb0af199ea79febf0d056a21b /activesupport | |
parent | 34b03cebf9c9f2ce2a53511a4b2160485e270f12 (diff) | |
download | rails-d7f9b9fd244228d3503d7d37ac2f07365d54df3c.tar.gz rails-d7f9b9fd244228d3503d7d37ac2f07365d54df3c.tar.bz2 rails-d7f9b9fd244228d3503d7d37ac2f07365d54df3c.zip |
Added SAX-based parser for XmlMini, using Nokogiri.
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
Diffstat (limited to 'activesupport')
-rw-r--r-- | activesupport/lib/active_support/xml_mini/nokogirisax.rb | 82 | ||||
-rw-r--r-- | activesupport/test/xml_mini/nokogirisax_engine_test.rb | 216 |
2 files changed, 298 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/xml_mini/nokogirisax.rb b/activesupport/lib/active_support/xml_mini/nokogirisax.rb new file mode 100644 index 0000000000..42a44897ba --- /dev/null +++ b/activesupport/lib/active_support/xml_mini/nokogirisax.rb @@ -0,0 +1,82 @@ +require 'nokogiri' + +# = XmlMini Nokogiri implementation using a SAX-based parser +module ActiveSupport + module XmlMini_NokogiriSAX + extend self + + # Class that will build the hash while the XML document + # is being parsed using SAX events. + class HashBuilder < Nokogiri::XML::SAX::Document + + CONTENT_KEY = '__content__'.freeze + HASH_SIZE_KEY = '__hash_size__'.freeze + + attr_reader :hash + + def current_hash + @hash_stack.last + end + + def start_document + @hash = {} + @hash_stack = [@hash] + end + + def end_document + raise "Parse stack not empty!" if @hash_stack.size > 1 + end + + def error(error_message) + raise Nokogiri::XML::SyntaxError, error_message + end + + def start_element(name, attrs = []) + new_hash = { CONTENT_KEY => '' } + new_hash[attrs.shift] = attrs.shift while attrs.length > 0 + new_hash[HASH_SIZE_KEY] = new_hash.size + 1 + + case current_hash[name] + when Array then current_hash[name] << new_hash + when Hash then current_hash[name] = [current_hash[name], new_hash] + when nil then current_hash[name] = new_hash + end + + @hash_stack.push(new_hash) + end + + def end_element(name) + if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == '' + current_hash.delete(CONTENT_KEY) + end + @hash_stack.pop + end + + def characters(string) + current_hash[CONTENT_KEY] << string + end + + alias_method :cdata_block, :characters + end + + attr_accessor :document_class + self.document_class = HashBuilder + + def parse(data) + if !data.respond_to?(:read) + data = StringIO.new(data || '') + end + + char = data.getc + if char.nil? + {} + else + data.ungetc(char) + document = self.document_class.new + parser = Nokogiri::XML::SAX::Parser.new(document) + parser.parse(data) + document.hash + end + end + end +end
\ No newline at end of file diff --git a/activesupport/test/xml_mini/nokogirisax_engine_test.rb b/activesupport/test/xml_mini/nokogirisax_engine_test.rb new file mode 100644 index 0000000000..43f1cda0e0 --- /dev/null +++ b/activesupport/test/xml_mini/nokogirisax_engine_test.rb @@ -0,0 +1,216 @@ +require 'abstract_unit' +require 'active_support/xml_mini' +require 'active_support/core_ext/hash/conversions' + +begin + require 'nokogiri' +rescue LoadError + # Skip nokogiri tests +else + +class NokogiriEngineTest < Test::Unit::TestCase + include ActiveSupport + + def setup + @default_backend = XmlMini.backend + XmlMini.backend = 'NokogiriSAX' + end + + def teardown + XmlMini.backend = @default_backend + end + + def test_file_from_xml + hash = Hash.from_xml(<<-eoxml) + <blog> + <logo type="file" name="logo.png" content_type="image/png"> + </logo> + </blog> + eoxml + assert hash.has_key?('blog') + assert hash['blog'].has_key?('logo') + + file = hash['blog']['logo'] + assert_equal 'logo.png', file.original_filename + assert_equal 'image/png', file.content_type + end + + def test_exception_thrown_on_expansion_attack + assert_raise Nokogiri::XML::SyntaxError do + attack_xml = <<-EOT + <?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE member [ + <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;"> + <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;"> + <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;"> + <!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;"> + <!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;"> + <!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"> + ]> + <member> + &a; + </member> + EOT + Hash.from_xml(attack_xml) + end + end + + def test_setting_nokogiri_as_backend + XmlMini.backend = 'Nokogiri' + assert_equal XmlMini_Nokogiri, XmlMini.backend + end + + def test_blank_returns_empty_hash + assert_equal({}, XmlMini.parse(nil)) + assert_equal({}, XmlMini.parse('')) + end + + def test_array_type_makes_an_array + assert_equal_rexml(<<-eoxml) + <blog> + <posts type="array"> + <post>a post</post> + <post>another post</post> + </posts> + </blog> + eoxml + end + + def test_one_node_document_as_hash + assert_equal_rexml(<<-eoxml) + <products/> + eoxml + end + + def test_one_node_with_attributes_document_as_hash + assert_equal_rexml(<<-eoxml) + <products foo="bar"/> + eoxml + end + + def test_products_node_with_book_node_as_hash + assert_equal_rexml(<<-eoxml) + <products> + <book name="awesome" id="12345" /> + </products> + eoxml + end + + def test_products_node_with_two_book_nodes_as_hash + assert_equal_rexml(<<-eoxml) + <products> + <book name="awesome" id="12345" /> + <book name="america" id="67890" /> + </products> + eoxml + end + + def test_single_node_with_content_as_hash + assert_equal_rexml(<<-eoxml) + <products> + hello world + </products> + eoxml + end + + def test_children_with_children + assert_equal_rexml(<<-eoxml) + <root> + <products> + <book name="america" id="67890" /> + </products> + </root> + eoxml + end + + def test_children_with_text + assert_equal_rexml(<<-eoxml) + <root> + <products> + hello everyone + </products> + </root> + eoxml + end + + def test_children_with_non_adjacent_text + assert_equal_rexml(<<-eoxml) + <root> + good + <products> + hello everyone + </products> + morning + </root> + eoxml + end + + def test_parse_from_io + io = StringIO.new(<<-eoxml) + <root> + good + <products> + hello everyone + </products> + morning + </root> + eoxml + XmlMini.parse(io) + end + + def test_children_with_simple_cdata + assert_equal_rexml(<<-eoxml) + <root> + <products> + <![CDATA[cdatablock]]> + </products> + </root> + eoxml + end + + def test_children_with_multiple_cdata + assert_equal_rexml(<<-eoxml) + <root> + <products> + <![CDATA[cdatablock1]]><![CDATA[cdatablock2]]> + </products> + </root> + eoxml + end + + def test_children_with_text_and_cdata + assert_equal_rexml(<<-eoxml) + <root> + <products> + hello <![CDATA[cdatablock]]> + morning + </products> + </root> + eoxml + end + + def test_children_with_blank_text + assert_equal_rexml(<<-eoxml) + <root> + <products> </products> + </root> + eoxml + end + + def test_children_with_blank_text_and_attribute + assert_equal_rexml(<<-eoxml) + <root> + <products type="file"> </products> + </root> + eoxml + end + + private + def assert_equal_rexml(xml) + hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) } + assert_equal(hash, XmlMini.parse(xml)) + end +end + +end |