2 files changed, 21 insertions, 15 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 3c0cf9f137..707cf200b5 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -1,4 +1,3 @@
-# encoding: utf-8
 require 'active_support/json'
 require 'active_support/core_ext/string/access'
 require 'active_support/core_ext/string/behavior'
@@ -86,10 +85,20 @@ module ActiveSupport #:nodoc:
         @wrapped_string.split(*args).map { |i| self.class.new(i) }
       end
 
-      # Works like like <tt>String#slice!</tt>, but returns an instance of
-      # Chars, or nil if the string was not modified.
+      # Works like <tt>String#slice!</tt>, but returns an instance of
+      # Chars, or nil if the string was not modified. The string will not be
+      # modified if the range given is out of bounds
+      #
+      #   string = 'Welcome'
+      #   string.mb_chars.slice!(3)    # => #<ActiveSupport::Multibyte::Chars:0x000000038109b8 @wrapped_string="c">
+      #   string # => 'Welome'
+      #   string.mb_chars.slice!(0..3) # => #<ActiveSupport::Multibyte::Chars:0x00000002eb80a0 @wrapped_string="Welo">
+      #   string # => 'me'
       def slice!(*args)
-        chars(@wrapped_string.slice!(*args))
+        string_sliced = @wrapped_string.slice!(*args)
+        if string_sliced
+          chars(string_sliced)
+        end
       end
 
       # Reverses all characters in the string.
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 62caff77a3..586002b03b 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -1,4 +1,3 @@
-# encoding: utf-8
 module ActiveSupport
   module Multibyte
     module Unicode
@@ -11,7 +10,7 @@ module ActiveSupport
       NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
 
       # The Unicode version that is supported by the implementation
-      UNICODE_VERSION = '6.3.0'
+      UNICODE_VERSION = '8.0.0'
 
       # The default normalization used for operations that require
       # normalization. It can be set to any of the normalizations
@@ -42,7 +41,6 @@ module ActiveSupport
         0x0085,                # White_Space # Cc       <control-0085>
         0x00A0,                # White_Space # Zs       NO-BREAK SPACE
         0x1680,                # White_Space # Zs       OGHAM SPACE MARK
-        0x180E,                # White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
         (0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
         0x2028,                # White_Space # Zl       LINE SEPARATOR
         0x2029,                # White_Space # Zp       PARAGRAPH SEPARATOR
@@ -59,7 +57,7 @@ module ActiveSupport
       # Returns a regular expression pattern that matches the passed Unicode
       # codepoints.
       def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
-        array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+        array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
       end
       TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
       LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
@@ -212,9 +210,8 @@ module ActiveSupport
         codepoints
       end
 
-      # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
       # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
-      if '<3'.respond_to?(:scrub) && !defined?(Rubinius)
+      if !defined?(Rubinius)
         # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
         # resulting in a valid UTF-8 string.
         #
@@ -259,7 +256,7 @@ module ActiveSupport
       # * <tt>string</tt> - The string to perform normalization on.
       # * <tt>form</tt> - The form you want to normalize in. Should be one of
       #   the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
-      #   Default is ActiveSupport::Multibyte.default_normalization_form.
+      #   Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
       def normalize(string, form=nil)
         form ||= @default_normalization_form
         # See http://www.unicode.org/reports/tr15, Table 1
@@ -275,7 +272,7 @@ module ActiveSupport
             compose(reorder_characters(decompose(:compatibility, codepoints)))
           else
             raise ArgumentError, "#{form} is not a valid normalization variant", caller
-        end.pack('U*')
+        end.pack('U*'.freeze)
       end
 
       def downcase(string)
@@ -336,11 +333,11 @@ module ActiveSupport
           begin
             @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
           rescue => e
-              raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
+            raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
           end
 
           # Redefine the === method so we can write shorter rules for grapheme cluster breaks
-          @boundary.each do |k,_|
+          @boundary.each_key do |k|
             @boundary[k].instance_eval do
               def ===(other)
                 detect { |i| i === other } ? true : false
@@ -368,6 +365,7 @@ module ActiveSupport
       private
 
       def apply_mapping(string, mapping) #:nodoc:
+        database.codepoints
         string.each_codepoint.map do |codepoint|
           cp = database.codepoints[codepoint]
           if cp and (ncp = cp.send(mapping)) and ncp > 0
@@ -385,7 +383,6 @@ module ActiveSupport
       def database
         @database ||= UnicodeDatabase.new
       end
-
     end
   end
 end