diff options
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode.rb')
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode.rb | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index 84799c2399..35efebc65f 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -11,7 +11,7 @@ module ActiveSupport NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = '6.3.0' + UNICODE_VERSION = '7.0.0' # The default normalization used for operations that require # normalization. It can be set to any of the normalizations @@ -42,7 +42,6 @@ module ActiveSupport 0x0085, # White_Space # Cc <control-0085> 0x00A0, # White_Space # Zs NO-BREAK SPACE 0x1680, # White_Space # Zs OGHAM SPACE MARK - 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE 0x2028, # White_Space # Zl LINE SEPARATOR 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR @@ -212,8 +211,8 @@ module ActiveSupport codepoints end - # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1. - if RUBY_VERSION >= '2.1' + # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars. + if !defined?(Rubinius) # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent # resulting in a valid UTF-8 string. # @@ -233,16 +232,16 @@ module ActiveSupport # We're going to 'transcode' bytes from UTF-8 when possible, then fall back to # CP1252 when we get errors. The final string will be 'converted' back to UTF-8 # before returning. - reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC) + reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_16LE) source = string.dup - out = ''.force_encoding(Encoding::UTF_8_MAC) + out = ''.force_encoding(Encoding::UTF_16LE) loop do reader.primitive_convert(source, out) _, _, _, error_bytes, _ = reader.primitive_errinfo break if error_bytes.nil? - out << error_bytes.encode(Encoding::UTF_8_MAC, Encoding::Windows_1252, invalid: :replace, undef: :replace) + out << error_bytes.encode(Encoding::UTF_16LE, Encoding::Windows_1252, invalid: :replace, undef: :replace) end reader.finish @@ -335,7 +334,7 @@ module ActiveSupport begin @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read } rescue => e - raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") + raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") end # Redefine the === method so we can write shorter rules for grapheme cluster breaks @@ -367,6 +366,7 @@ module ActiveSupport private def apply_mapping(string, mapping) #:nodoc: + database.codepoints string.each_codepoint.map do |codepoint| cp = database.codepoints[codepoint] if cp and (ncp = cp.send(mapping)) and ncp > 0 @@ -384,7 +384,6 @@ module ActiveSupport def database @database ||= UnicodeDatabase.new end - end end end |