From f0e754e7136f92398495590908162b2501de5869 Mon Sep 17 00:00:00 2001 From: Norman Clarke Date: Wed, 14 Apr 2010 11:12:07 -0300 Subject: Delegate Inflector.transliterate to i18n. [#4508 state:resolved] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ancillary changes: Moved Chars#normalize into a class method; removed unused UTF_PAT constant. Signed-off-by: José Valim --- .../lib/active_support/multibyte/chars.rb | 45 +++++++++++++--------- 1 file changed, 27 insertions(+), 18 deletions(-) (limited to 'activesupport/lib/active_support/multibyte') diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 4ade1158fd..cca30d1141 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -75,8 +75,6 @@ module ActiveSupport #:nodoc: UNICODE_TRAILERS_PAT = /(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+\Z/u UNICODE_LEADERS_PAT = /\A(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+/u - UTF8_PAT = ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'] - attr_reader :wrapped_string alias to_s wrapped_string alias to_str wrapped_string @@ -409,25 +407,11 @@ module ActiveSupport #:nodoc: # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for # passing strings to databases and validations. # - # * str - The string to perform normalization on. # * form - The form you want to normalize in. Should be one of the following: # :c, :kc, :d, or :kd. Default is # ActiveSupport::Multibyte.default_normalization_form def normalize(form=ActiveSupport::Multibyte.default_normalization_form) - # See http://www.unicode.org/reports/tr15, Table 1 - codepoints = self.class.u_unpack(@wrapped_string) - chars(case form - when :d - self.class.reorder_characters(self.class.decompose_codepoints(:canonical, codepoints)) - when :c - self.class.compose_codepoints(self.class.reorder_characters(self.class.decompose_codepoints(:canonical, codepoints))) - when :kd - self.class.reorder_characters(self.class.decompose_codepoints(:compatability, codepoints)) - when :kc - self.class.compose_codepoints(self.class.reorder_characters(self.class.decompose_codepoints(:compatability, codepoints))) - else - raise ArgumentError, "#{form} is not a valid normalization variant", caller - end.pack('U*')) + chars(self.class.normalize(@wrapped_string, form)) end # Performs canonical decomposition on all the characters. @@ -659,7 +643,7 @@ module ActiveSupport #:nodoc: # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string. # - # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP-1252 or ISO-8859-1. + # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1. def tidy_bytes(string, force = false) if force return string.unpack("C*").map do |b| @@ -708,6 +692,31 @@ module ActiveSupport #:nodoc: end bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*") end + + # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for + # passing strings to databases and validations. + # + # * string - The string to perform normalization on. + # * form - The form you want to normalize in. Should be one of the following: + # :c, :kc, :d, or :kd. Default is + # ActiveSupport::Multibyte.default_normalization_form + def normalize(string, form=ActiveSupport::Multibyte.default_normalization_form) + # See http://www.unicode.org/reports/tr15, Table 1 + codepoints = u_unpack(string) + case form + when :d + reorder_characters(decompose_codepoints(:canonical, codepoints)) + when :c + compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints))) + when :kd + reorder_characters(decompose_codepoints(:compatability, codepoints)) + when :kc + compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints))) + else + raise ArgumentError, "#{form} is not a valid normalization variant", caller + end.pack('U*') + end + end protected -- cgit v1.2.3