diff options
Diffstat (limited to 'activesupport/lib/active_support/multibyte')
-rw-r--r-- | activesupport/lib/active_support/multibyte/chars.rb | 17 | ||||
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode.rb | 19 |
2 files changed, 21 insertions, 15 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb index 3c0cf9f137..707cf200b5 100644 --- a/activesupport/lib/active_support/multibyte/chars.rb +++ b/activesupport/lib/active_support/multibyte/chars.rb @@ -1,4 +1,3 @@ -# encoding: utf-8 require 'active_support/json' require 'active_support/core_ext/string/access' require 'active_support/core_ext/string/behavior' @@ -86,10 +85,20 @@ module ActiveSupport #:nodoc: @wrapped_string.split(*args).map { |i| self.class.new(i) } end - # Works like like <tt>String#slice!</tt>, but returns an instance of - # Chars, or nil if the string was not modified. + # Works like <tt>String#slice!</tt>, but returns an instance of + # Chars, or nil if the string was not modified. The string will not be + # modified if the range given is out of bounds + # + # string = 'Welcome' + # string.mb_chars.slice!(3) # => #<ActiveSupport::Multibyte::Chars:0x000000038109b8 @wrapped_string="c"> + # string # => 'Welome' + # string.mb_chars.slice!(0..3) # => #<ActiveSupport::Multibyte::Chars:0x00000002eb80a0 @wrapped_string="Welo"> + # string # => 'me' def slice!(*args) - chars(@wrapped_string.slice!(*args)) + string_sliced = @wrapped_string.slice!(*args) + if string_sliced + chars(string_sliced) + end end # Reverses all characters in the string. diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index 62caff77a3..586002b03b 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -1,4 +1,3 @@ -# encoding: utf-8 module ActiveSupport module Multibyte module Unicode @@ -11,7 +10,7 @@ module ActiveSupport NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = '6.3.0' + UNICODE_VERSION = '8.0.0' # The default normalization used for operations that require # normalization. It can be set to any of the normalizations @@ -42,7 +41,6 @@ module ActiveSupport 0x0085, # White_Space # Cc <control-0085> 0x00A0, # White_Space # Zs NO-BREAK SPACE 0x1680, # White_Space # Zs OGHAM SPACE MARK - 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE 0x2028, # White_Space # Zl LINE SEPARATOR 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR @@ -59,7 +57,7 @@ module ActiveSupport # Returns a regular expression pattern that matches the passed Unicode # codepoints. def self.codepoints_to_pattern(array_of_codepoints) #:nodoc: - array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|') + array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze) end TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u @@ -212,9 +210,8 @@ module ActiveSupport codepoints end - # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1. # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars. - if '<3'.respond_to?(:scrub) && !defined?(Rubinius) + if !defined?(Rubinius) # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent # resulting in a valid UTF-8 string. # @@ -259,7 +256,7 @@ module ActiveSupport # * <tt>string</tt> - The string to perform normalization on. # * <tt>form</tt> - The form you want to normalize in. Should be one of # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. - # Default is ActiveSupport::Multibyte.default_normalization_form. + # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form. def normalize(string, form=nil) form ||= @default_normalization_form # See http://www.unicode.org/reports/tr15, Table 1 @@ -275,7 +272,7 @@ module ActiveSupport compose(reorder_characters(decompose(:compatibility, codepoints))) else raise ArgumentError, "#{form} is not a valid normalization variant", caller - end.pack('U*') + end.pack('U*'.freeze) end def downcase(string) @@ -336,11 +333,11 @@ module ActiveSupport begin @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read } rescue => e - raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") + raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") end # Redefine the === method so we can write shorter rules for grapheme cluster breaks - @boundary.each do |k,_| + @boundary.each_key do |k| @boundary[k].instance_eval do def ===(other) detect { |i| i === other } ? true : false @@ -368,6 +365,7 @@ module ActiveSupport private def apply_mapping(string, mapping) #:nodoc: + database.codepoints string.each_codepoint.map do |codepoint| cp = database.codepoints[codepoint] if cp and (ncp = cp.send(mapping)) and ncp > 0 @@ -385,7 +383,6 @@ module ActiveSupport def database @database ||= UnicodeDatabase.new end - end end end |