2 files changed, 19 insertions, 49 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 938e4ebb72..65d6259a06 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -87,7 +87,7 @@ module ActiveSupport #:nodoc:
       end
 
       # Works like <tt>String#slice!</tt>, but returns an instance of
-      # Chars, or nil if the string was not modified. The string will not be
+      # Chars, or +nil+ if the string was not modified. The string will not be
       # modified if the range given is out of bounds
       #
       #   string = 'Welcome'
@@ -210,9 +210,9 @@ module ActiveSupport #:nodoc:
         end
       end
 
-      protected
+      private
 
-        def translate_offset(byte_offset) #:nodoc:
+        def translate_offset(byte_offset)
           return nil if byte_offset.nil?
           return 0   if @wrapped_string == ""
 
@@ -224,7 +224,7 @@ module ActiveSupport #:nodoc:
           end
         end
 
-        def chars(string) #:nodoc:
+        def chars(string)
           self.class.new(string)
         end
     end
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 2159abef14..05cfb249c3 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -30,36 +30,6 @@ module ActiveSupport
       HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
       HANGUL_SCOUNT = 11172
       HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
-      HANGUL_JAMO_FIRST = 0x1100
-      HANGUL_JAMO_LAST = 0x11FF
-
-      # All the unicode whitespace
-      WHITESPACE = [
-        (0x0009..0x000D).to_a, # White_Space # Cc   [5] <control-0009>..<control-000D>
-        0x0020,                # White_Space # Zs       SPACE
-        0x0085,                # White_Space # Cc       <control-0085>
-        0x00A0,                # White_Space # Zs       NO-BREAK SPACE
-        0x1680,                # White_Space # Zs       OGHAM SPACE MARK
-        (0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
-        0x2028,                # White_Space # Zl       LINE SEPARATOR
-        0x2029,                # White_Space # Zp       PARAGRAPH SEPARATOR
-        0x202F,                # White_Space # Zs       NARROW NO-BREAK SPACE
-        0x205F,                # White_Space # Zs       MEDIUM MATHEMATICAL SPACE
-        0x3000,                # White_Space # Zs       IDEOGRAPHIC SPACE
-      ].flatten.freeze
-
-      # BOM (byte order mark) can also be seen as whitespace, it's a
-      # non-rendering character used to distinguish between little and big
-      # endian. This is not an issue in utf-8, so it must be ignored.
-      LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
-
-      # Returns a regular expression pattern that matches the passed Unicode
-      # codepoints.
-      def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
-        array_of_codepoints.collect { |e| [e].pack "U*".freeze }.join("|".freeze)
-      end
-      TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
-      LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
 
       # Detect whether the codepoint is in a certain character class. Returns
       # +true+ when it's in the specified character class and +false+ otherwise.
@@ -82,9 +52,9 @@ module ActiveSupport
         pos = 0
         marker = 0
         eoc = codepoints.length
-        while(pos < eoc)
+        while (pos < eoc)
           pos += 1
-          previous = codepoints[pos-1]
+          previous = codepoints[pos - 1]
           current = codepoints[pos]
 
           should_break =
@@ -92,19 +62,19 @@ module ActiveSupport
             if previous == database.boundary[:cr] && current == database.boundary[:lf]
               false
             # GB4. (Control|CR|LF) ÷
-            elsif previous && in_char_class?(previous, [:control,:cr,:lf])
+            elsif previous && in_char_class?(previous, [:control, :cr, :lf])
               true
             # GB5. ÷ (Control|CR|LF)
-            elsif in_char_class?(current, [:control,:cr,:lf])
+            elsif in_char_class?(current, [:control, :cr, :lf])
               true
             # GB6. L X (L|V|LV|LVT)
-            elsif database.boundary[:l] === previous && in_char_class?(current, [:l,:v,:lv,:lvt])
+            elsif database.boundary[:l] === previous && in_char_class?(current, [:l, :v, :lv, :lvt])
               false
             # GB7. (LV|V) X (V|T)
-            elsif in_char_class?(previous, [:lv,:v]) && in_char_class?(current, [:v,:t])
+            elsif in_char_class?(previous, [:lv, :v]) && in_char_class?(current, [:v, :t])
               false
             # GB8. (LVT|T) X (T)
-            elsif in_char_class?(previous, [:lvt,:t]) && database.boundary[:t] === current
+            elsif in_char_class?(previous, [:lvt, :t]) && database.boundary[:t] === current
               false
             # GB8a. Regional_Indicator X Regional_Indicator
             elsif database.boundary[:regional_indicator] === previous && database.boundary[:regional_indicator] === current
@@ -124,7 +94,7 @@ module ActiveSupport
             end
 
           if should_break
-            unpacked << codepoints[marker..pos-1]
+            unpacked << codepoints[marker..pos - 1]
             marker = pos
           end
         end
@@ -140,12 +110,12 @@ module ActiveSupport
 
       # Re-order codepoints so the string becomes canonical.
       def reorder_characters(codepoints)
-        length = codepoints.length- 1
+        length = codepoints.length - 1
         pos = 0
         while pos < length do
-          cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
+          cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos + 1]]
           if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
-            codepoints[pos..pos+1] = cp2.code, cp1.code
+            codepoints[pos..pos + 1] = cp2.code, cp1.code
             pos += (pos > 0 ? -1 : 1)
           else
             pos += 1
@@ -187,9 +157,9 @@ module ActiveSupport
           lindex = starter_char - HANGUL_LBASE
           # -- Hangul
           if 0 <= lindex && lindex < HANGUL_LCOUNT
-            vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
+            vindex = codepoints[starter_pos + 1] - HANGUL_VBASE rescue vindex = -1
             if 0 <= vindex && vindex < HANGUL_VCOUNT
-              tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
+              tindex = codepoints[starter_pos + 2] - HANGUL_TBASE rescue tindex = -1
               if 0 <= tindex && tindex < HANGUL_TCOUNT
                 j = starter_pos + 2
                 eoa -= 2
@@ -281,7 +251,7 @@ module ActiveSupport
       # * <tt>form</tt> - The form you want to normalize in. Should be one of
       #   the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
       #   Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
-      def normalize(string, form=nil)
+      def normalize(string, form = nil)
         form ||= @default_normalization_form
         # See http://www.unicode.org/reports/tr15, Table 1
         codepoints = string.codepoints.to_a
@@ -388,7 +358,7 @@ module ActiveSupport
 
       private
 
-        def apply_mapping(string, mapping) #:nodoc:
+        def apply_mapping(string, mapping)
           database.codepoints
           string.each_codepoint.map do |codepoint|
             cp = database.codepoints[codepoint]