From 3ca6d1fb4a3acc7a6dfff9ee39ee7f75fa71d0f4 Mon Sep 17 00:00:00 2001 From: Adam Roben Date: Wed, 13 Nov 2013 17:16:55 -0500 Subject: Support extended grapheme clusters and UAX 29 http://www.unicode.org/reports/tr29/tr29-21.html is the version of UAX 29 that corresponds to Unicode 6.2.0. Unicode.unpack_graphemes now implements all the rules listed there, including the ones for extended grapheme clusters. I added a new optional test, test/multibyte_grapheme_break_conformance.rb, that is heavily based on test/multibyte_normalization_conformance.rb, which runs the Unicode test suite. --- activesupport/lib/active_support/multibyte/unicode.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'activesupport/lib/active_support') diff --git a/activesupport/lib/active_support/multibyte/unicode.rb b/activesupport/lib/active_support/multibyte/unicode.rb index aea7709b55..d85ea3b5e6 100644 --- a/activesupport/lib/active_support/multibyte/unicode.rb +++ b/activesupport/lib/active_support/multibyte/unicode.rb @@ -94,6 +94,12 @@ module ActiveSupport # GB3. CR X LF if previous == database.boundary[:cr] and current == database.boundary[:lf] false + # GB4. (Control|CR|LF) ÷ + elsif previous and in_char_class?(previous, [:control,:cr,:lf]) + true + # GB5. ÷ (Control|CR|LF) + elsif in_char_class?(current, [:control,:cr,:lf]) + true # GB6. L X (L|V|LV|LVT) elsif database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) false @@ -103,9 +109,18 @@ module ActiveSupport # GB8. (LVT|T) X (T) elsif in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current false + # GB8a. Regional_Indicator X Regional_Indicator + elsif database.boundary[:regional_indicator] === previous and database.boundary[:regional_indicator] === current + false # GB9. X Extend elsif database.boundary[:extend] === current false + # GB9a. X SpacingMark + elsif database.boundary[:spacingmark] === current + false + # GB9b. Prepend X + elsif database.boundary[:prepend] === previous + false # GB10. Any ÷ Any else true -- cgit v1.2.3