aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/multibyte')
-rw-r--r--activesupport/lib/active_support/multibyte/chars.rb17
-rw-r--r--activesupport/lib/active_support/multibyte/unicode.rb19
2 files changed, 21 insertions, 15 deletions
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 3c0cf9f137..707cf200b5 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -1,4 +1,3 @@
-# encoding: utf-8
require 'active_support/json'
require 'active_support/core_ext/string/access'
require 'active_support/core_ext/string/behavior'
@@ -86,10 +85,20 @@ module ActiveSupport #:nodoc:
@wrapped_string.split(*args).map { |i| self.class.new(i) }
end
- # Works like like <tt>String#slice!</tt>, but returns an instance of
- # Chars, or nil if the string was not modified.
+ # Works like <tt>String#slice!</tt>, but returns an instance of
+ # Chars, or nil if the string was not modified. The string will not be
+ # modified if the range given is out of bounds
+ #
+ # string = 'Welcome'
+ # string.mb_chars.slice!(3) # => #<ActiveSupport::Multibyte::Chars:0x000000038109b8 @wrapped_string="c">
+ # string # => 'Welome'
+ # string.mb_chars.slice!(0..3) # => #<ActiveSupport::Multibyte::Chars:0x00000002eb80a0 @wrapped_string="Welo">
+ # string # => 'me'
def slice!(*args)
- chars(@wrapped_string.slice!(*args))
+ string_sliced = @wrapped_string.slice!(*args)
+ if string_sliced
+ chars(string_sliced)
+ end
end
# Reverses all characters in the string.
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 62caff77a3..586002b03b 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -1,4 +1,3 @@
-# encoding: utf-8
module ActiveSupport
module Multibyte
module Unicode
@@ -11,7 +10,7 @@ module ActiveSupport
NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
# The Unicode version that is supported by the implementation
- UNICODE_VERSION = '6.3.0'
+ UNICODE_VERSION = '8.0.0'
# The default normalization used for operations that require
# normalization. It can be set to any of the normalizations
@@ -42,7 +41,6 @@ module ActiveSupport
0x0085, # White_Space # Cc <control-0085>
0x00A0, # White_Space # Zs NO-BREAK SPACE
0x1680, # White_Space # Zs OGHAM SPACE MARK
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
(0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
0x2028, # White_Space # Zl LINE SEPARATOR
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
@@ -59,7 +57,7 @@ module ActiveSupport
# Returns a regular expression pattern that matches the passed Unicode
# codepoints.
def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
- array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+ array_of_codepoints.collect{ |e| [e].pack 'U*'.freeze }.join('|'.freeze)
end
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
@@ -212,9 +210,8 @@ module ActiveSupport
codepoints
end
- # Ruby >= 2.1 has String#scrub, which is faster than the workaround used for < 2.1.
# Rubinius' String#scrub, however, doesn't support ASCII-incompatible chars.
- if '<3'.respond_to?(:scrub) && !defined?(Rubinius)
+ if !defined?(Rubinius)
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent
# resulting in a valid UTF-8 string.
#
@@ -259,7 +256,7 @@ module ActiveSupport
# * <tt>string</tt> - The string to perform normalization on.
# * <tt>form</tt> - The form you want to normalize in. Should be one of
# the following: <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>.
- # Default is ActiveSupport::Multibyte.default_normalization_form.
+ # Default is ActiveSupport::Multibyte::Unicode.default_normalization_form.
def normalize(string, form=nil)
form ||= @default_normalization_form
# See http://www.unicode.org/reports/tr15, Table 1
@@ -275,7 +272,7 @@ module ActiveSupport
compose(reorder_characters(decompose(:compatibility, codepoints)))
else
raise ArgumentError, "#{form} is not a valid normalization variant", caller
- end.pack('U*')
+ end.pack('U*'.freeze)
end
def downcase(string)
@@ -336,11 +333,11 @@ module ActiveSupport
begin
@codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
rescue => e
- raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
end
# Redefine the === method so we can write shorter rules for grapheme cluster breaks
- @boundary.each do |k,_|
+ @boundary.each_key do |k|
@boundary[k].instance_eval do
def ===(other)
detect { |i| i === other } ? true : false
@@ -368,6 +365,7 @@ module ActiveSupport
private
def apply_mapping(string, mapping) #:nodoc:
+ database.codepoints
string.each_codepoint.map do |codepoint|
cp = database.codepoints[codepoint]
if cp and (ncp = cp.send(mapping)) and ncp > 0
@@ -385,7 +383,6 @@ module ActiveSupport
def database
@database ||= UnicodeDatabase.new
end
-
end
end
end