From 05633d02d8ac8aa1289c0a01872e13e9b2449cd5 Mon Sep 17 00:00:00 2001 From: Cliff Pruitt Date: Fri, 26 Jul 2019 11:02:40 -0400 Subject: Handle GB18030 strings with invalid characters in transliterate GB18030 is Unicode compatible and covers all Unicode code points so we can temporarily convert GB18030 strings to UTF-8 to perform the transliteration. After transliterating we want to convert back to GB18030. In all cases of transcoding, we replace invalid or undefined characters with the default replacement character ("?"). This is in line with the behavior of tidy_bytes which is used on the UTF-8 string before transliterating. --- activesupport/test/transliterate_test.rb | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'activesupport/test') diff --git a/activesupport/test/transliterate_test.rb b/activesupport/test/transliterate_test.rb index 47830946bf..ab7ffcaed0 100644 --- a/activesupport/test/transliterate_test.rb +++ b/activesupport/test/transliterate_test.rb @@ -75,7 +75,9 @@ class TransliterateTest < ActiveSupport::TestCase # Valid GB18030 Works def test_transliterate_handles_strings_with_valid_gb18030_encodings string = String.new("A", encoding: Encoding::GB18030) - assert_equal "A", ActiveSupport::Inflector.transliterate(string) + transcoded = ActiveSupport::Inflector.transliterate(string) + assert_equal "A", transcoded + assert_equal Encoding::GB18030, transcoded.encoding end # All other encodings raise argument errors @@ -103,17 +105,12 @@ class TransliterateTest < ActiveSupport::TestCase # Invalid raises exception def test_transliterate_handles_strings_with_invalid_us_ascii_bytes string = String.new("\255", encoding: Encoding::US_ASCII) - # exception = assert_raises Encoding::CompatibilityError do - # ActiveSupport::Inflector.transliterate(string) - # end assert_equal "?", ActiveSupport::Inflector.transliterate(string) end # Invalid GB18030 raises exception def test_transliterate_handles_strings_with_invalid_gb18030_bytes string = String.new("\255", encoding: Encoding::GB18030) - exception = assert_raises Encoding::CompatibilityError do - ActiveSupport::Inflector.transliterate(string) - end + assert_equal "?", ActiveSupport::Inflector.transliterate(string) end end -- cgit v1.2.3