diff options
-rw-r--r-- | activesupport/CHANGELOG | 2 | ||||
-rw-r--r-- | activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb | 38 | ||||
-rw-r--r-- | activesupport/test/multibyte_handler_test.rb | 34 |
3 files changed, 74 insertions, 0 deletions
diff --git a/activesupport/CHANGELOG b/activesupport/CHANGELOG index 1b539f52bf..35a7d77859 100644 --- a/activesupport/CHANGELOG +++ b/activesupport/CHANGELOG @@ -1,5 +1,7 @@ *SVN* +* Add support for []= on ActiveSupport::Multibyte::Chars. Closes #9142. [ewan, manfred] + * Added Array#extract_options! to encapsulate the pattern of getting an options hash out of a variable number of parameters #8759 [norbert]. * Let alias_attribute work with attributes with initial capital letters (legacy columns etc). Closes #8596 [mpalmer] diff --git a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb index 009950d33e..02fc7b3e2b 100644 --- a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb +++ b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb @@ -140,6 +140,44 @@ module ActiveSupport::Multibyte::Handlers #:nodoc: bidx ? (u_unpack(str.slice(0...bidx)).size) : nil end + # Works just like the indexed replace method on string, except instead of byte offsets you specify + # character offsets. + # + # Example: + # + # s = "Müller" + # s.chars[2] = "e" # Replace character with offset 2 + # s + # #=> "Müeler" + # + # s = "Müller" + # s.chars[1, 2] = "ö" # Replace 2 characters at character offset 1 + # s + # #=> "Möler" + def []=(str, *args) + replace_by = args.pop + # Indexed replace with regular expressions already works + return str[*args] = replace_by if args.first.is_a?(Regexp) + result = u_unpack(str) + if args[0].is_a?(Fixnum) + raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length + min = args[0] + max = args[1].nil? ? min : (min + args[1] - 1) + range = Range.new(min, max) + replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum) + elsif args.first.is_a?(Range) + raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length + range = args[0] + else + needle = args[0].to_s + min = index(str, needle) + max = min + length(needle) - 1 + range = Range.new(min, max) + end + result[range] = u_unpack(replace_by) + str.replace(result.pack('U*')) + end + # Does Unicode-aware rstrip def rstrip(str) str.gsub(UNICODE_TRAILERS_PAT, '') diff --git a/activesupport/test/multibyte_handler_test.rb b/activesupport/test/multibyte_handler_test.rb index ea728aa555..e4744def6c 100644 --- a/activesupport/test/multibyte_handler_test.rb +++ b/activesupport/test/multibyte_handler_test.rb @@ -199,6 +199,40 @@ module UTF8HandlingTest assert_raise(ActiveSupport::Multibyte::Handlers::EncodingError) { @handler.index(@bytestring, "\010") } end + def test_indexed_insert + s = "Καλη!" + @handler[s, 2] = "a" + assert_equal "Καaη!", s + @handler[s, 2] = "ηη" + assert_equal "Καηηη!", s + assert_raises(IndexError) { @handler[s, 10] = 'a' } + assert_equal "Καηηη!", s + @handler[s, 2] = 32 + assert_equal "Κα ηη!", s + @handler[s, 3, 2] = "λλλ" + assert_equal "Κα λλλ!", s + @handler[s, 1, 0] = "λ" + assert_equal "Κλα λλλ!", s + assert_raises(IndexError) { @handler[s, 10, 4] = 'a' } + assert_equal "Κλα λλλ!", s + @handler[s, 4..6] = "ηη" + assert_equal "Κλα ηη!", s + assert_raises(RangeError) { @handler[s, 10..12] = 'a' } + assert_equal "Κλα ηη!", s + @handler[s, /ηη/] = "λλλ" + assert_equal "Κλα λλλ!", s + assert_raises(IndexError) { @handler[s, /ii/] = 'a' } + assert_equal "Κλα λλλ!", s + @handler[s, /(λλ)(.)/, 2] = "α" + assert_equal "Κλα λλα!", s + assert_raises(IndexError) { @handler[s, /()/, 10] = 'a' } + assert_equal "Κλα λλα!", s + @handler[s, "α"] = "η" + assert_equal "Κλη λλα!", s + @handler[s, "λλ"] = "ααα" + assert_equal "Κλη αααα!", s + end + def test_strip # A unicode aware version of strip should strip all 26 types of whitespace. This includes the NO BREAK SPACE # aka BOM (byte order mark). The byte order mark has no place in UTF-8 because it's used to detect LE and BE. |