diff options
Diffstat (limited to 'activesupport/lib/active_support/multibyte')
-rw-r--r-- | activesupport/lib/active_support/multibyte/chars.rb | 15 | ||||
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode.rb | 28 |
2 files changed, 27 insertions, 16 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 262f25b874..8c58466556 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -16,7 +16,8 @@ module ActiveSupport #:nodoc: # through the +mb_chars+ method. Methods which would normally return a # String object now return a Chars object so methods can be chained. # - # 'The Perfect String '.mb_chars.downcase.strip.normalize # => "the perfect string" + # 'The Perfect String '.mb_chars.downcase.strip.normalize + # # => #<ActiveSupport::Multibyte::Chars:0x007fdc434ccc10 @wrapped_string="the perfect string"> # # Chars objects are perfectly interchangeable with String objects as long as # no explicit class checks are made. If certain methods do explicitly check @@ -134,7 +135,7 @@ module ActiveSupport #:nodoc: # Converts characters in the string to the opposite case. # - # 'El Cañón".mb_chars.swapcase.to_s # => "eL cAÑÓN" + # 'El Cañón'.mb_chars.swapcase.to_s # => "eL cAÑÓN" def swapcase chars Unicode.swapcase(@wrapped_string) end @@ -148,8 +149,8 @@ module ActiveSupport #:nodoc: # Capitalizes the first letter of every word, when possible. # - # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró" - # "日本語".mb_chars.titleize # => "日本語" + # "ÉL QUE SE ENTERÓ".mb_chars.titleize.to_s # => "Él Que Se Enteró" + # "日本語".mb_chars.titleize.to_s # => "日本語" def titleize chars(downcase.to_s.gsub(/\b('?\S)/u) { Unicode.upcase($1) }) end @@ -210,9 +211,9 @@ module ActiveSupport #:nodoc: end end - protected + private - def translate_offset(byte_offset) #:nodoc: + def translate_offset(byte_offset) return nil if byte_offset.nil? return 0 if @wrapped_string == "" @@ -224,7 +225,7 @@ module ActiveSupport #:nodoc: end end - def chars(string) #:nodoc: + def chars(string) self.class.new(string) end end diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index 7842264b39..0912912aba 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -9,7 +9,7 @@ module ActiveSupport NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = "8.0.0" + UNICODE_VERSION = "9.0.0" # The default normalization used for operations that require # normalization. It can be set to any of the normalizations @@ -57,9 +57,12 @@ module ActiveSupport previous = codepoints[pos - 1] current = codepoints[pos] + # See http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules should_break = + if pos == eoc + true # GB3. CR X LF - if previous == database.boundary[:cr] && current == database.boundary[:lf] + elsif previous == database.boundary[:cr] && current == database.boundary[:lf] false # GB4. (Control|CR|LF) ÷ elsif previous && in_char_class?(previous, [:control, :cr, :lf]) @@ -76,11 +79,8 @@ module ActiveSupport # GB8. (LVT|T) X (T) elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current false - # GB8a. Regional_Indicator X Regional_Indicator - elsif database.boundary[:regional_indicator] === previous && database.boundary[:regional_indicator] === current - false - # GB9. X Extend - elsif database.boundary[:extend] === current + # GB9. X (Extend | ZWJ) + elsif in_char_class?(current, [:extend, :zwj]) false # GB9a. X SpacingMark elsif database.boundary[:spacingmark] === current @@ -88,7 +88,17 @@ module ActiveSupport # GB9b. Prepend X elsif database.boundary[:prepend] === previous false - # GB10. Any ÷ Any + # GB10. (E_Base | EBG) Extend* X E_Modifier + elsif (marker...pos).any? { |i| in_char_class?(codepoints[i], [:e_base, :e_base_gaz]) && codepoints[i + 1...pos].all? { |c| database.boundary[:extend] === c } } && database.boundary[:e_modifier] === current + false + # GB11. ZWJ X (Glue_After_Zwj | EBG) + elsif database.boundary[:zwj] === previous && in_char_class?(current, [:glue_after_zwj, :e_base_gaz]) + false + # GB12. ^ (RI RI)* RI X RI + # GB13. [^RI] (RI RI)* RI X RI + elsif codepoints[marker..pos].all? { |c| database.boundary[:regional_indicator] === c } && codepoints[marker..pos].count { |c| database.boundary[:regional_indicator] === c }.even? + false + # GB999. Any ÷ Any else true end @@ -358,7 +368,7 @@ module ActiveSupport private - def apply_mapping(string, mapping) #:nodoc: + def apply_mapping(string, mapping) database.codepoints string.each_codepoint.map do |codepoint| cp = database.codepoints[codepoint] |