aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/unicode.rb
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode.rb')
-rw-r--r--activesupport/lib/active_support/multibyte/unicode.rb17
1 files changed, 8 insertions, 9 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 84799c2399..35efebc65f 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -11,7 +11,7 @@ module ActiveSupport
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
# The Unicode version that is supported by the implementation
- UNICODE_VERSION = '6.3.0'
+ UNICODE_VERSION = '7.0.0'
# The default normalization used for operations that require
# normalization. It can be set to any of the normalizations
@@ -42,7 +42,6 @@ module ActiveSupport
0x0085, # White_Space # Cc <control-0085>
0x00A0, # White_Space # Zs NO-BREAK SPACE
0x1680, # White_Space # Zs OGHAM SPACE MARK
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
0x2028, # White_Space # Zl LINE SEPARATOR
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
@@ -212,8 +211,8 @@ module ActiveSupport
codepoints
end
- # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
- if RUBY_VERSION >= '2.1'
+ # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
+ if !defined?(Rubinius)
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
# resulting in a valid UTF-8 string.
#
@@ -233,16 +232,16 @@ module ActiveSupport
# We're going to 'transcode' bytes from UTF-8 when possible, then fall back to
# CP1252 when we get errors. The final string will be 'converted' back to UTF-8
# before returning.
- reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC)
+ reader = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_16LE)
source = string.dup
- out = ''.force_encoding(Encoding::UTF_8_MAC)
+ out = ''.force_encoding(Encoding::UTF_16LE)
loop do
reader.primitive_convert(source, out)
_, _, _, error_bytes, _ = reader.primitive_errinfo
break if error_bytes.nil?
- out << error_bytes.encode(Encoding::UTF_8_MAC, Encoding::Windows_1252, invalid: :replace, undef: :replace)
+ out << error_bytes.encode(Encoding::UTF_16LE, Encoding::Windows_1252, invalid: :replace, undef: :replace)
end
reader.finish
@@ -335,7 +334,7 @@ module ActiveSupport
begin
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
rescue => e
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
end
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
@@ -367,6 +366,7 @@ module ActiveSupport
private
def apply_mapping(string, mapping) #:nodoc:
+ database.codepoints
string.each_codepoint.map do |codepoint|
cp = database.codepoints[codepoint]
if cp and (ncp = cp.send(mapping)) and ncp > 0
@@ -384,7 +384,6 @@ module ActiveSupport
def database
@database ||= UnicodeDatabase.new
end
-
end
end
end