diff options
-rw-r--r-- | activesupport/CHANGELOG.md | 16 | ||||
-rwxr-xr-x | activesupport/bin/generate_tables | 4 | ||||
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode.rb | 26 | ||||
-rw-r--r-- | activesupport/lib/active_support/values/unicode_tables.dat | bin | 1068675 -> 1116857 bytes | |||
-rw-r--r-- | activesupport/test/multibyte_test_helpers.rb | 2 |
5 files changed, 38 insertions, 10 deletions
diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index 5207194fba..5af97e3d37 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -1,3 +1,19 @@ +* Updated Unicode version to 9.0.0 + + Now we can handle new emojis such like "๐ฉโ๐ฉโ๐งโ๐ฆ" ("\u{1F469}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}"). + + version 8.0.0 + + "๐ฉโ๐ฉโ๐งโ๐ฆ".mb_chars.grapheme_length # => 4 + "๐ฉโ๐ฉโ๐งโ๐ฆ".mb_chars.reverse # => "๐ฆ๐งโ๐ฉโ๐ฉโ" + + version 9.0.0 + + "๐ฉโ๐ฉโ๐งโ๐ฆ".mb_chars.grapheme_length # => 1 + "๐ฉโ๐ฉโ๐งโ๐ฆ".mb_chars.reverse # => "๐ฉโ๐ฉโ๐งโ๐ฆ" + + *Fumiaki MATSUSHIMA* + * Changed `ActiveSupport::Inflector#transliterate` to raise `ArgumentError` when it receives anything except a string. diff --git a/activesupport/bin/generate_tables b/activesupport/bin/generate_tables index 5d912f375c..aa36a01b5b 100755 --- a/activesupport/bin/generate_tables +++ b/activesupport/bin/generate_tables @@ -8,6 +8,7 @@ end require "open-uri" require "tmpdir" +require "fileutils" module ActiveSupport module Multibyte @@ -101,9 +102,10 @@ module ActiveSupport def parse SOURCES.each do |type, url| - filename = File.join(Dir.tmpdir, "#{url.split('/').last}") + filename = File.join(Dir.tmpdir, UNICODE_VERSION, "#{url.split('/').last}") unless File.exist?(filename) $stderr.puts "Downloading #{url.split('/').last}" + FileUtils.mkdir_p(File.dirname(filename)) File.open(filename, "wb") do |target| open(url) do |source| source.each_line { |line| target.write line } diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index 05cfb249c3..0912912aba 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -9,7 +9,7 @@ module ActiveSupport NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = "8.0.0" + UNICODE_VERSION = "9.0.0" # The default normalization used for operations that require # normalization. It can be set to any of the normalizations @@ -57,9 +57,12 @@ module ActiveSupport previous = codepoints[pos - 1] current = codepoints[pos] + # See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules should_break = + if pos == eoc + true # GB3. CR X LF - if previous == database.boundary[:cr] && current == database.boundary[:lf] + elsif previous == database.boundary[:cr] && current == database.boundary[:lf] false # GB4. (Control|CR|LF) รท elsif previous && in_char_class?(previous, [:control, :cr, :lf]) @@ -76,11 +79,8 @@ module ActiveSupport # GB8. (LVT|T) X (T) elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current false - # GB8a. Regional_Indicator X Regional_Indicator - elsif database.boundary[:regional_indicator] === previous && database.boundary[:regional_indicator] === current - false - # GB9. X Extend - elsif database.boundary[:extend] === current + # GB9. X (Extend | ZWJ) + elsif in_char_class?(current, [:extend, :zwj]) false # GB9a. X SpacingMark elsif database.boundary[:spacingmark] === current @@ -88,7 +88,17 @@ module ActiveSupport # GB9b. Prepend X elsif database.boundary[:prepend] === previous false - # GB10. Any รท Any + # GB10. (E_Base | EBG) Extend* X E_Modifier + elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current + false + # GB11. ZWJ X (Glue_After_Zwj | EBG) + elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz]) + false + # GB12. ^ (RI RI)* RI X RI + # GB13. [^RI] (RI RI)* RI X RI + elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even? + false + # GB999. Any รท Any else true end diff --git a/activesupport/lib/active_support/values/unicode_tables.dat b/activesupport/lib/active_support/values/unicode_tables.dat Binary files differindex dd2c178fb6..f7d9c48bbe 100644 --- a/activesupport/lib/active_support/values/unicode_tables.dat +++ b/activesupport/lib/active_support/values/unicode_tables.dat diff --git a/activesupport/test/multibyte_test_helpers.rb b/activesupport/test/multibyte_test_helpers.rb index 2201860d8a..a70516bb08 100644 --- a/activesupport/test/multibyte_test_helpers.rb +++ b/activesupport/test/multibyte_test_helpers.rb @@ -18,7 +18,7 @@ module MultibyteTestHelpers end UNIDATA_URL = "http://www.unicode.org/Public/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}/ucd" - CACHE_DIR = "#{Dir.tmpdir}/cache/unicode_conformance" + CACHE_DIR = "#{Dir.tmpdir}/cache/unicode_conformance/#{ActiveSupport::Multibyte::Unicode::UNICODE_VERSION}" FileUtils.mkdir_p(CACHE_DIR) UNICODE_STRING = "ใใซใกใ".freeze |