aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--activesupport/lib/active_support/multibyte/chars.rb29
-rw-r--r--activesupport/test/multibyte_chars_test.rb59
2 files changed, 74 insertions, 14 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index c9bcfbd313..51b870de8c 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -363,6 +363,16 @@ module ActiveSupport #:nodoc:
slice
end
+ # Limit the byte size of the string to a number of bytes without breaking characters. Usable
+ # when the storage for a string is limited for some reason.
+ #
+ # Example:
+ # s = 'こんにちは'
+ # s.mb_chars.limit(7) #=> "こに"
+ def limit(limit)
+ slice(0...translate_offset(limit))
+ end
+
# Returns the codepoint of the first character in the string.
#
# Example:
@@ -651,24 +661,15 @@ module ActiveSupport #:nodoc:
end
protected
-
+
def translate_offset(byte_offset) #:nodoc:
return nil if byte_offset.nil?
return 0 if @wrapped_string == ''
- chunk = @wrapped_string[0..byte_offset]
begin
- begin
- chunk.unpack('U*').length - 1
- rescue ArgumentError => e
- chunk = @wrapped_string[0..(byte_offset+=1)]
- # Stop retrying at the end of the string
- raise e unless byte_offset < chunk.length
- # We damaged a character, retry
- retry
- end
- # Catch the ArgumentError so we can throw our own
- rescue ArgumentError
- raise EncodingError, 'malformed UTF-8 character'
+ @wrapped_string[0...byte_offset].unpack('U*').length
+ rescue ArgumentError => e
+ byte_offset -= 1
+ retry
end
end
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
index a1ea8c8f4c..fc7b5e5081 100644
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -169,6 +169,7 @@ class MultibyteCharsUTF8BehaviourTest < Test::Unit::TestCase
assert chars('').strip.kind_of?(ActiveSupport::Multibyte.proxy_class)
assert chars('').reverse.kind_of?(ActiveSupport::Multibyte.proxy_class)
assert chars(' ').slice(0).kind_of?(ActiveSupport::Multibyte.proxy_class)
+ assert chars('').limit(0).kind_of?(ActiveSupport::Multibyte.proxy_class)
assert chars('').upcase.kind_of?(ActiveSupport::Multibyte.proxy_class)
assert chars('').downcase.kind_of?(ActiveSupport::Multibyte.proxy_class)
assert chars('').capitalize.kind_of?(ActiveSupport::Multibyte.proxy_class)
@@ -196,7 +197,9 @@ class MultibyteCharsUTF8BehaviourTest < Test::Unit::TestCase
def test_should_return_character_offset_for_regexp_matches
assert_nil(@chars =~ /wrong/u)
assert_equal 0, (@chars =~ /こ/u)
+ assert_equal 0, (@chars =~ /こに/u)
assert_equal 1, (@chars =~ /に/u)
+ assert_equal 2, (@chars =~ /ち/u)
assert_equal 3, (@chars =~ /わ/u)
end
@@ -493,6 +496,44 @@ class MultibyteCharsExtrasTest < Test::Unit::TestCase
end
end
+ def test_limit_should_not_break_on_blank_strings
+ chars = ''.mb_chars
+
+ assert_equal '', chars.limit(0)
+ assert_equal '', chars.limit(1)
+ end
+
+ def test_limit_should_work_on_a_multibyte_string
+ chars = UNICODE_STRING.mb_chars
+
+ assert_equal UNICODE_STRING, chars.limit(UNICODE_STRING.length)
+ assert_equal '', chars.limit(0)
+ assert_equal '', chars.limit(1)
+ assert_equal 'こ', chars.limit(3)
+ assert_equal 'こに', chars.limit(6)
+ assert_equal 'こに', chars.limit(8)
+ assert_equal 'こにち', chars.limit(9)
+ assert_equal 'こにちわ', chars.limit(50)
+ end
+
+ def test_limit_should_work_on_an_ascii_string
+ ascii = ASCII_STRING.mb_chars
+
+ assert_equal ASCII_STRING, ascii.limit(ASCII_STRING.length)
+ assert_equal '', ascii.limit(0)
+ assert_equal 'o', ascii.limit(1)
+ assert_equal 'oh', ascii.limit(2)
+ assert_equal 'ohay', ascii.limit(4)
+ assert_equal 'ohayo', ascii.limit(50)
+ end
+
+ def test_limit_should_keep_under_the_specified_byte_limit
+ chars = UNICODE_STRING.mb_chars
+ (1..UNICODE_STRING.length).each do |limit|
+ assert chars.limit(limit).to_s.length <= limit
+ end
+ end
+
def test_composition_exclusion_is_set_up_properly
# Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly
qa = [0x915, 0x93c].pack('U*')
@@ -603,3 +644,21 @@ class MultibyteCharsExtrasTest < Test::Unit::TestCase
end.pack('U*')
end
end
+
+class MultibyteInternalsTest < ActiveSupport::TestCase
+ include MultibyteTestHelpers
+
+ test "Chars translates a character offset to a byte offset" do
+ chars = "Puisque c'était son erreur, il m'a aidé".mb_chars
+ [
+ [0, 0],
+ [3, 3],
+ [12, 11],
+ [14, 13],
+ [41, 39]
+ ].each do |byte_offset, character_offset|
+ assert_equal character_offset, chars.send(:translate_offset, byte_offset),
+ "Expected byte offset #{byte_offset} to translate to #{character_offset}"
+ end
+ end
+end \ No newline at end of file