diff options
author | Norman Clarke <norman@njclarke.com> | 2012-01-05 17:08:27 -0300 |
---|---|---|
committer | Norman Clarke <norman@njclarke.com> | 2012-01-05 17:08:27 -0300 |
commit | 3fe7ca1dbea75ae83cd2eb868ba3f8518c0849a4 (patch) | |
tree | c2f8d0c0df58e98496a7345747f29873e6f35a78 /activesupport | |
parent | 51648a6fee31c9642d3ce8899a1c718e1604f4bc (diff) | |
download | rails-3fe7ca1dbea75ae83cd2eb868ba3f8518c0849a4.tar.gz rails-3fe7ca1dbea75ae83cd2eb868ba3f8518c0849a4.tar.bz2 rails-3fe7ca1dbea75ae83cd2eb868ba3f8518c0849a4.zip |
Replace Unicode.u_unpack with String#codepoints
Diffstat (limited to 'activesupport')
5 files changed, 5 insertions, 38 deletions
diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb index cabe073616..fc15af17db 100644 --- a/activesupport/lib/active_support/multibyte.rb +++ b/activesupport/lib/active_support/multibyte.rb @@ -3,7 +3,6 @@ require 'active_support/core_ext/module/attribute_accessors' module ActiveSupport #:nodoc: module Multibyte - autoload :EncodingError, 'active_support/multibyte/exceptions' autoload :Chars, 'active_support/multibyte/chars' autoload :Unicode, 'active_support/multibyte/unicode' diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 1389cb50c9..c0796320b2 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -153,7 +153,7 @@ module ActiveSupport #:nodoc: # 'é'.length # => 2 # 'é'.mb_chars.decompose.to_s.length # => 3 def decompose - chars(Unicode.decompose(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*')) + chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack('U*')) end # Performs composition on all the characters. @@ -162,7 +162,7 @@ module ActiveSupport #:nodoc: # 'é'.length # => 3 # 'é'.mb_chars.compose.to_s.length # => 2 def compose - chars(Unicode.compose(Unicode.u_unpack(@wrapped_string)).pack('U*')) + chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack('U*')) end # Returns the number of grapheme clusters in the string. diff --git a/activesupport/lib/active_support/multibyte/exceptions.rb b/activesupport/lib/active_support/multibyte/exceptions.rb deleted file mode 100644 index 62066e3c71..0000000000 --- a/activesupport/lib/active_support/multibyte/exceptions.rb +++ /dev/null @@ -1,8 +0,0 @@ -# encoding: utf-8 - -module ActiveSupport #:nodoc: - module Multibyte #:nodoc: - # Raised when a problem with the encoding was found. - class EncodingError < StandardError; end - end -end
\ No newline at end of file diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index c689c14631..e258e2e48e 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -61,19 +61,6 @@ module ActiveSupport TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u - # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't - # valid UTF-8. - # - # Example: - # Unicode.u_unpack('Café') # => [67, 97, 102, 233] - def u_unpack(string) - begin - string.unpack 'U*' - rescue ArgumentError - raise EncodingError, 'malformed UTF-8 character' - end - end - # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>, # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>. @@ -89,7 +76,7 @@ module ActiveSupport # Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]] # Unicode.g_unpack('Café') # => [[67], [97], [102], [233]] def g_unpack(string) - codepoints = u_unpack(string) + codepoints = string.codepoints.to_a unpacked = [] pos = 0 marker = 0 @@ -283,7 +270,7 @@ module ActiveSupport def normalize(string, form=nil) form ||= @default_normalization_form # See http://www.unicode.org/reports/tr15, Table 1 - codepoints = u_unpack(string) + codepoints = string.codepoints.to_a case form when :d reorder_characters(decompose(:canonical, codepoints)) @@ -299,7 +286,7 @@ module ActiveSupport end def apply_mapping(string, mapping) #:nodoc: - u_unpack(string).map do |codepoint| + string.each_codepoint.map do |codepoint| cp = database.codepoints[codepoint] if cp and (ncp = cp.send(mapping)) and ncp > 0 ncp diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb index 0c6b03f15f..87830d57d3 100644 --- a/activesupport/test/multibyte_chars_test.rb +++ b/activesupport/test/multibyte_chars_test.rb @@ -72,17 +72,6 @@ class MultibyteCharsTest < Test::Unit::TestCase assert !@proxy_class.consumes?(BYTE_STRING) end - def test_unpack_utf8_strings - assert_equal 4, ActiveSupport::Multibyte::Unicode.u_unpack(UNICODE_STRING).length - assert_equal 5, ActiveSupport::Multibyte::Unicode.u_unpack(ASCII_STRING).length - end - - def test_unpack_raises_encoding_error_on_broken_strings - assert_raise(ActiveSupport::Multibyte::EncodingError) do - ActiveSupport::Multibyte::Unicode.u_unpack(BYTE_STRING) - end - end - def test_concatenation_should_return_a_proxy_class_instance assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars + 'b').class assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars << 'b').class |