diff options
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode.rb')
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode.rb | 19 |
1 files changed, 8 insertions, 11 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index 62caff77a3..586002b03b 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -1,4 +1,3 @@ -# encoding: utf-8 module ActiveSupport module Multibyte module Unicode @@ -11,7 +10,7 @@ module ActiveSupport NORMALIZATION_FORMS = [:c, :kc, :d, :kd] # The Unicode version that is supported by the implementation - UNICODE_VERSION = '6.3.0' + UNICODE_VERSION = '8.0.0' # The default normalization used for operations that require # normalization. It can be set to any of the normalizations @@ -42,7 +41,6 @@ module ActiveSupport 0x0085, # White_Space # Cc <control-0085> 0x00A0, # White_Space # Zs NO-BREAK SPACE 0x1680, # White_Space # Zs OGHAM SPACE MARK - 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE 0x2028, # White_Space # Zl LINE SEPARATOR 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR @@ -59,7 +57,7 @@ module ActiveSupport # Returns a regular expression pattern that matches the passed Unicode # codepoints. def self.codepoints_to_pattern(array_of_codepoints) #:nodoc: - array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|') + array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze) end TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u @@ -212,9 +210,8 @@ module ActiveSupport codepoints end - # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1. # Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars. - if '<3'.respond_to?(:scrub) && !defined?(Rubinius) + if !defined?(Rubinius) # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent # resulting in a valid UTF-8 string. # @@ -259,7 +256,7 @@ module ActiveSupport # * <tt>string</tt> - The string to perform normalization on. # * <tt>form</tt> - The form you want to normalize in. Should be one of # the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. - # Default is ActiveSupport::Multibyte.default_normalization_form. + # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form. def normalize(string, form=nil) form ||= @default_normalization_form # See http://www.unicode.org/reports/tr15, Table 1 @@ -275,7 +272,7 @@ module ActiveSupport compose(reorder_characters(decompose(:compatibility, codepoints))) else raise ArgumentError, "#{form} is not a valid normalization variant", caller - end.pack('U*') + end.pack('U*'.freeze) end def downcase(string) @@ -336,11 +333,11 @@ module ActiveSupport begin @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read } rescue => e - raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") + raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") end # Redefine the === method so we can write shorter rules for grapheme cluster breaks - @boundary.each do |k,_| + @boundary.each_key do |k| @boundary[k].instance_eval do def ===(other) detect { |i| i === other } ? true : false @@ -368,6 +365,7 @@ module ActiveSupport private def apply_mapping(string, mapping) #:nodoc: + database.codepoints string.each_codepoint.map do |codepoint| cp = database.codepoints[codepoint] if cp and (ncp = cp.send(mapping)) and ncp > 0 @@ -385,7 +383,6 @@ module ActiveSupport def database @database ||= UnicodeDatabase.new end - end end end |