aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWillem van Bergen <willem@vanbergen.org>2010-01-01 12:57:36 +0100
committerJeremy Kemper <jeremy@bitsweat.net>2010-01-01 13:17:46 -0800
commit96a2b3905ce14df8f25b1646d3b110505bf8820b (patch)
tree4ff32801cc260b2298382686e0a725e741b8a4b9
parentd7f9b9fd244228d3503d7d37ac2f07365d54df3c (diff)
downloadrails-96a2b3905ce14df8f25b1646d3b110505bf8820b.tar.gz
rails-96a2b3905ce14df8f25b1646d3b110505bf8820b.tar.bz2
rails-96a2b3905ce14df8f25b1646d3b110505bf8820b.zip
Added SAX-based parser for XmlMini, using LibXML
Signed-off-by: Jeremy Kemper <jeremy@bitsweat.net>
-rw-r--r--activesupport/lib/active_support/xml_mini/libxmlsax.rb84
-rw-r--r--activesupport/test/xml_mini/libxmlsax_engine_test.rb194
2 files changed, 278 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/xml_mini/libxmlsax.rb b/activesupport/lib/active_support/xml_mini/libxmlsax.rb
new file mode 100644
index 0000000000..dba1f8755a
--- /dev/null
+++ b/activesupport/lib/active_support/xml_mini/libxmlsax.rb
@@ -0,0 +1,84 @@
+require 'libxml'
+
+# = XmlMini LibXML implementation using a SAX-based parser
+module ActiveSupport
+ module XmlMini_LibXMLSAX
+ extend self
+
+ # Class that will build the hash while the XML document
+ # is being parsed using SAX events.
+ class HashBuilder
+
+ include LibXML::XML::SaxParser::Callbacks
+
+ CONTENT_KEY = '__content__'.freeze
+ HASH_SIZE_KEY = '__hash_size__'.freeze
+
+ attr_reader :hash
+
+ def current_hash
+ @hash_stack.last
+ end
+
+ def on_start_document
+ @hash = {}
+ @hash_stack = [@hash]
+ end
+
+ def on_end_document
+ raise "Parse stack not empty!" if @hash_stack.size > 1
+ end
+
+ def on_error(error_message)
+ raise LibXML::XML::Error, error_message
+ end
+
+ def on_start_element(name, attrs = {})
+ new_hash = { CONTENT_KEY => '' }.merge(attrs)
+ new_hash[HASH_SIZE_KEY] = new_hash.size + 1
+
+ case current_hash[name]
+ when Array then current_hash[name] << new_hash
+ when Hash then current_hash[name] = [current_hash[name], new_hash]
+ when nil then current_hash[name] = new_hash
+ end
+
+ @hash_stack.push(new_hash)
+ end
+
+ def on_end_element(name)
+ if current_hash.length > current_hash.delete(HASH_SIZE_KEY) && current_hash[CONTENT_KEY].blank? || current_hash[CONTENT_KEY] == ''
+ current_hash.delete(CONTENT_KEY)
+ end
+ @hash_stack.pop
+ end
+
+ def on_characters(string)
+ current_hash[CONTENT_KEY] << string
+ end
+
+ alias_method :on_cdata_block, :on_characters
+ end
+
+ attr_accessor :document_class
+ self.document_class = HashBuilder
+
+ def parse(data)
+ if !data.respond_to?(:read)
+ data = StringIO.new(data || '')
+ end
+
+ char = data.getc
+ if char.nil?
+ {}
+ else
+ data.ungetc(char)
+ document = self.document_class.new
+ parser = LibXML::XML::SaxParser.io(data)
+ parser.callbacks = document
+ parser.parse
+ document.hash
+ end
+ end
+ end
+end \ No newline at end of file
diff --git a/activesupport/test/xml_mini/libxmlsax_engine_test.rb b/activesupport/test/xml_mini/libxmlsax_engine_test.rb
new file mode 100644
index 0000000000..6d5b3673fa
--- /dev/null
+++ b/activesupport/test/xml_mini/libxmlsax_engine_test.rb
@@ -0,0 +1,194 @@
+require 'abstract_unit'
+require 'active_support/xml_mini'
+require 'active_support/core_ext/hash/conversions'
+
+begin
+ require 'libxml'
+rescue LoadError
+ # Skip libxml tests
+else
+
+class LibXMLSAXEngineTest < Test::Unit::TestCase
+ include ActiveSupport
+
+ def setup
+ @default_backend = XmlMini.backend
+ XmlMini.backend = 'LibXMLSAX'
+
+ LibXML::XML::Error.set_handler(&lambda { |error| }) #silence libxml, exceptions will do
+ end
+
+ def teardown
+ XmlMini.backend = @default_backend
+ end
+
+ def test_exception_thrown_on_expansion_attack
+ assert_raise LibXML::XML::Error do
+ attack_xml = %{<?xml version="1.0" encoding="UTF-8"?>
+ <!DOCTYPE member [
+ <!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
+ <!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
+ <!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
+ <!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
+ <!ENTITY e "&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;">
+ <!ENTITY f "&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;">
+ <!ENTITY g "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
+ ]>
+ <member>
+ &a;
+ </member>
+ }
+ Hash.from_xml(attack_xml)
+ end
+ end
+
+ def test_setting_libxml_as_backend
+ XmlMini.backend = 'LibXMLSAX'
+ assert_equal XmlMini_LibXMLSAX, XmlMini.backend
+ end
+
+ def test_blank_returns_empty_hash
+ assert_equal({}, XmlMini.parse(nil))
+ assert_equal({}, XmlMini.parse(''))
+ end
+
+ def test_array_type_makes_an_array
+ assert_equal_rexml(<<-eoxml)
+ <blog>
+ <posts type="array">
+ <post>a post</post>
+ <post>another post</post>
+ </posts>
+ </blog>
+ eoxml
+ end
+
+ def test_one_node_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products/>
+ eoxml
+ end
+
+ def test_one_node_with_attributes_document_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products foo="bar"/>
+ eoxml
+ end
+
+ def test_products_node_with_book_node_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ </products>
+ eoxml
+ end
+
+ def test_products_node_with_two_book_nodes_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ <book name="awesome" id="12345" />
+ <book name="america" id="67890" />
+ </products>
+ eoxml
+ end
+
+ def test_single_node_with_content_as_hash
+ assert_equal_rexml(<<-eoxml)
+ <products>
+ hello world
+ </products>
+ eoxml
+ end
+
+ def test_children_with_children
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <book name="america" id="67890" />
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello everyone
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_non_adjacent_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ good
+ <products>
+ hello everyone
+ </products>
+ morning
+ </root>
+ eoxml
+ end
+
+ def test_parse_from_io
+ io = StringIO.new(<<-eoxml)
+ <root>
+ good
+ <products>
+ hello everyone
+ </products>
+ morning
+ </root>
+ eoxml
+ XmlMini.parse(io)
+ end
+
+ def test_children_with_simple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_multiple_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ <![CDATA[cdatablock1]]><![CDATA[cdatablock2]]>
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_text_and_cdata
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products>
+ hello <![CDATA[cdatablock]]>
+ morning
+ </products>
+ </root>
+ eoxml
+ end
+
+ def test_children_with_blank_text
+ assert_equal_rexml(<<-eoxml)
+ <root>
+ <products> </products>
+ </root>
+ eoxml
+ end
+
+ private
+ def assert_equal_rexml(xml)
+ hash = XmlMini.with_backend('REXML') { XmlMini.parse(xml) }
+ assert_equal(hash, XmlMini.parse(xml))
+ end
+end
+
+end