aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/chars.rb
diff options
context:
space:
mode:
authorNorman Clarke <norman@njclarke.com>2010-04-14 11:12:07 -0300
committerJosé Valim <jose.valim@gmail.com>2010-04-30 16:18:12 +0200
commitf0e754e7136f92398495590908162b2501de5869 (patch)
tree51e7a40ac81128c780ec67f3e98d0ed872defd2e /activesupport/lib/active_support/multibyte/chars.rb
parent60504e62c8e2f5e137a0ac82aed67a6c0fe42447 (diff)
downloadrails-f0e754e7136f92398495590908162b2501de5869.tar.gz
rails-f0e754e7136f92398495590908162b2501de5869.tar.bz2
rails-f0e754e7136f92398495590908162b2501de5869.zip
Delegate Inflector.transliterate to i18n. [#4508 state:resolved]
Ancillary changes: Moved Chars#normalize into a class method; removed unused UTF_PAT constant. Signed-off-by: José Valim <jose.valim@gmail.com>
Diffstat (limited to 'activesupport/lib/active_support/multibyte/chars.rb')
-rw-r--r--activesupport/lib/active_support/multibyte/chars.rb45
1 files changed, 27 insertions, 18 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 4ade1158fd..cca30d1141 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -75,8 +75,6 @@ module ActiveSupport #:nodoc:
UNICODE_TRAILERS_PAT = /(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+\Z/u
UNICODE_LEADERS_PAT = /\A(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+/u
- UTF8_PAT = ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8']
-
attr_reader :wrapped_string
alias to_s wrapped_string
alias to_str wrapped_string
@@ -409,25 +407,11 @@ module ActiveSupport #:nodoc:
# Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
# passing strings to databases and validations.
#
- # * <tt>str</tt> - The string to perform normalization on.
# * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
# <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
# ActiveSupport::Multibyte.default_normalization_form
def normalize(form=ActiveSupport::Multibyte.default_normalization_form)
- # See http://www.unicode.org/reports/tr15, Table 1
- codepoints = self.class.u_unpack(@wrapped_string)
- chars(case form
- when :d
- self.class.reorder_characters(self.class.decompose_codepoints(:canonical, codepoints))
- when :c
- self.class.compose_codepoints(self.class.reorder_characters(self.class.decompose_codepoints(:canonical, codepoints)))
- when :kd
- self.class.reorder_characters(self.class.decompose_codepoints(:compatability, codepoints))
- when :kc
- self.class.compose_codepoints(self.class.reorder_characters(self.class.decompose_codepoints(:compatability, codepoints)))
- else
- raise ArgumentError, "#{form} is not a valid normalization variant", caller
- end.pack('U*'))
+ chars(self.class.normalize(@wrapped_string, form))
end
# Performs canonical decomposition on all the characters.
@@ -659,7 +643,7 @@ module ActiveSupport #:nodoc:
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
#
- # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP-1252 or ISO-8859-1.
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
def tidy_bytes(string, force = false)
if force
return string.unpack("C*").map do |b|
@@ -708,6 +692,31 @@ module ActiveSupport #:nodoc:
end
bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
end
+
+ # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
+ # passing strings to databases and validations.
+ #
+ # * <tt>string</tt> - The string to perform normalization on.
+ # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
+ # <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
+ # ActiveSupport::Multibyte.default_normalization_form
+ def normalize(string, form=ActiveSupport::Multibyte.default_normalization_form)
+ # See http://www.unicode.org/reports/tr15, Table 1
+ codepoints = u_unpack(string)
+ case form
+ when :d
+ reorder_characters(decompose_codepoints(:canonical, codepoints))
+ when :c
+ compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints)))
+ when :kd
+ reorder_characters(decompose_codepoints(:compatability, codepoints))
+ when :kc
+ compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints)))
+ else
+ raise ArgumentError, "#{form} is not a valid normalization variant", caller
+ end.pack('U*')
+ end
+
end
protected