aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--activesupport/CHANGELOG2
-rw-r--r--activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb38
-rw-r--r--activesupport/test/multibyte_handler_test.rb34
3 files changed, 74 insertions, 0 deletions
diff --git a/activesupport/CHANGELOG b/activesupport/CHANGELOG
index 1b539f52bf..35a7d77859 100644
--- a/activesupport/CHANGELOG
+++ b/activesupport/CHANGELOG
@@ -1,5 +1,7 @@
*SVN*
+* Add support for []= on ActiveSupport::Multibyte::Chars. Closes #9142. [ewan, manfred]
+
* Added Array#extract_options! to encapsulate the pattern of getting an options hash out of a variable number of parameters #8759 [norbert].
* Let alias_attribute work with attributes with initial capital letters (legacy columns etc). Closes #8596 [mpalmer]
diff --git a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
index 009950d33e..02fc7b3e2b 100644
--- a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
+++ b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
@@ -140,6 +140,44 @@ module ActiveSupport::Multibyte::Handlers #:nodoc:
bidx ? (u_unpack(str.slice(0...bidx)).size) : nil
end
+ # Works just like the indexed replace method on string, except instead of byte offsets you specify
+ # character offsets.
+ #
+ # Example:
+ #
+ # s = "Müller"
+ # s.chars[2] = "e" # Replace character with offset 2
+ # s
+ # #=> "Müeler"
+ #
+ # s = "Müller"
+ # s.chars[1, 2] = "ö" # Replace 2 characters at character offset 1
+ # s
+ # #=> "Möler"
+ def []=(str, *args)
+ replace_by = args.pop
+ # Indexed replace with regular expressions already works
+ return str[*args] = replace_by if args.first.is_a?(Regexp)
+ result = u_unpack(str)
+ if args[0].is_a?(Fixnum)
+ raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
+ min = args[0]
+ max = args[1].nil? ? min : (min + args[1] - 1)
+ range = Range.new(min, max)
+ replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
+ elsif args.first.is_a?(Range)
+ raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
+ range = args[0]
+ else
+ needle = args[0].to_s
+ min = index(str, needle)
+ max = min + length(needle) - 1
+ range = Range.new(min, max)
+ end
+ result[range] = u_unpack(replace_by)
+ str.replace(result.pack('U*'))
+ end
+
# Does Unicode-aware rstrip
def rstrip(str)
str.gsub(UNICODE_TRAILERS_PAT, '')
diff --git a/activesupport/test/multibyte_handler_test.rb b/activesupport/test/multibyte_handler_test.rb
index ea728aa555..e4744def6c 100644
--- a/activesupport/test/multibyte_handler_test.rb
+++ b/activesupport/test/multibyte_handler_test.rb
@@ -199,6 +199,40 @@ module UTF8HandlingTest
assert_raise(ActiveSupport::Multibyte::Handlers::EncodingError) { @handler.index(@bytestring, "\010") }
end
+ def test_indexed_insert
+ s = "Καλη!"
+ @handler[s, 2] = "a"
+ assert_equal "Καaη!", s
+ @handler[s, 2] = "ηη"
+ assert_equal "Καηηη!", s
+ assert_raises(IndexError) { @handler[s, 10] = 'a' }
+ assert_equal "Καηηη!", s
+ @handler[s, 2] = 32
+ assert_equal "Κα ηη!", s
+ @handler[s, 3, 2] = "λλλ"
+ assert_equal "Κα λλλ!", s
+ @handler[s, 1, 0] = "λ"
+ assert_equal "Κλα λλλ!", s
+ assert_raises(IndexError) { @handler[s, 10, 4] = 'a' }
+ assert_equal "Κλα λλλ!", s
+ @handler[s, 4..6] = "ηη"
+ assert_equal "Κλα ηη!", s
+ assert_raises(RangeError) { @handler[s, 10..12] = 'a' }
+ assert_equal "Κλα ηη!", s
+ @handler[s, /ηη/] = "λλλ"
+ assert_equal "Κλα λλλ!", s
+ assert_raises(IndexError) { @handler[s, /ii/] = 'a' }
+ assert_equal "Κλα λλλ!", s
+ @handler[s, /(λλ)(.)/, 2] = "α"
+ assert_equal "Κλα λλα!", s
+ assert_raises(IndexError) { @handler[s, /()/, 10] = 'a' }
+ assert_equal "Κλα λλα!", s
+ @handler[s, "α"] = "η"
+ assert_equal "Κλη λλα!", s
+ @handler[s, "λλ"] = "ααα"
+ assert_equal "Κλη αααα!", s
+ end
+
def test_strip
# A unicode aware version of strip should strip all 26 types of whitespace. This includes the NO BREAK SPACE
# aka BOM (byte order mark). The byte order mark has no place in UTF-8 because it's used to detect LE and BE.