aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport
diff options
context:
space:
mode:
authorAdam Roben <adam@roben.org>2013-11-13 17:13:22 -0500
committerAdam Roben <adam@roben.org>2013-11-13 17:13:22 -0500
commit56e7b31487b838410d185eaf573359432ec2d11a (patch)
tree69404d8671d17b81a6730ee273a7eaf62f8884c4 /activesupport
parentc994e1086270cdfe4e145ea206049f05ef5414d6 (diff)
downloadrails-56e7b31487b838410d185eaf573359432ec2d11a.tar.gz
rails-56e7b31487b838410d185eaf573359432ec2d11a.tar.bz2
rails-56e7b31487b838410d185eaf573359432ec2d11a.zip
Refactor Unicode.unpack_graphemes slightly
This will make it easier to add the rest of the rules listed in UAX 29.
Diffstat (limited to 'activesupport')
-rw-r--r--activesupport/lib/active_support/multibyte/unicode.rb36
1 files changed, 23 insertions, 13 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb
index 1845c6ae38..aea7709b55 100644
--- a/activesupport/lib/active_support/multibyte/unicode.rb
+++ b/activesupport/lib/active_support/multibyte/unicode.rb
@@ -89,19 +89,29 @@ module ActiveSupport
pos += 1
previous = codepoints[pos-1]
current = codepoints[pos]
- if (
- # CR X LF
- ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
- # L X (L|V|LV|LVT)
- ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
- # (LV|V) X (V|T)
- ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
- # (LVT|T) X (T)
- ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
- # X Extend
- (database.boundary[:extend] === current)
- )
- else
+
+ should_break =
+ # GB3. CR X LF
+ if previous == database.boundary[:cr] and current == database.boundary[:lf]
+ false
+ # GB6. L X (L|V|LV|LVT)
+ elsif database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt])
+ false
+ # GB7. (LV|V) X (V|T)
+ elsif in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t])
+ false
+ # GB8. (LVT|T) X (T)
+ elsif in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current
+ false
+ # GB9. X Extend
+ elsif database.boundary[:extend] === current
+ false
+ # GB10. Any รท Any
+ else
+ true
+ end
+
+ if should_break
unpacked << codepoints[marker..pos-1]
marker = pos
end