aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/unicode.rb
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode.rb')
-rw-r--r--activesupport/lib/active_support/multibyte/unicode.rb19
1 files changed, 8 insertions, 11 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 62caff77a3..586002b03b 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -1,4 +1,3 @@
-# encoding: utf-8
module ActiveSupport
module Multibyte
module Unicode
@@ -11,7 +10,7 @@ module ActiveSupport
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
# The Unicode version that is supported by the implementation
- UNICODE_VERSION = '6.3.0'
+ UNICODE_VERSION = '8.0.0'
# The default normalization used for operations that require
# normalization. It can be set to any of the normalizations
@@ -42,7 +41,6 @@ module ActiveSupport
0x0085, # White_Space # Cc <control-0085>
0x00A0, # White_Space # Zs NO-BREAK SPACE
0x1680, # White_Space # Zs OGHAM SPACE MARK
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
0x2028, # White_Space # Zl LINE SEPARATOR
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
@@ -59,7 +57,7 @@ module ActiveSupport
# Returns a regular expression pattern that matches the passed Unicode
# codepoints.
def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
- array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+ array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
end
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
@@ -212,9 +210,8 @@ module ActiveSupport
codepoints
end
- # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
- if '<3'.respond_to?(:scrub) && !defined?(Rubinius)
+ if !defined?(Rubinius)
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
# resulting in a valid UTF-8 string.
#
@@ -259,7 +256,7 @@ module ActiveSupport
# * <tt>string</tt> - The string to perform normalization on.
# * <tt>form</tt> - The form you want to normalize in. Should be one of
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
- # Default is ActiveSupport::Multibyte.default_normalization_form.
+ # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
def normalize(string, form=nil)
form ||= @default_normalization_form
# See http://www.unicode.org/reports/tr15, Table 1
@@ -275,7 +272,7 @@ module ActiveSupport
compose(reorder_characters(decompose(:compatibility, codepoints)))
else
raise ArgumentError, "#{form} is not a valid normalization variant", caller
- end.pack('U*')
+ end.pack('U*'.freeze)
end
def downcase(string)
@@ -336,11 +333,11 @@ module ActiveSupport
begin
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
rescue => e
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
end
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
- @boundary.each do |k,_|
+ @boundary.each_key do |k|
@boundary[k].instance_eval do
def ===(other)
detect { |i| i === other } ? true : false
@@ -368,6 +365,7 @@ module ActiveSupport
private
def apply_mapping(string, mapping) #:nodoc:
+ database.codepoints
string.each_codepoint.map do |codepoint|
cp = database.codepoints[codepoint]
if cp and (ncp = cp.send(mapping)) and ncp > 0
@@ -385,7 +383,6 @@ module ActiveSupport
def database
@database ||= UnicodeDatabase.new
end
-
end
end
end