From b5245da94a7c7667b57bb2184fa9aa7beb998da6 Mon Sep 17 00:00:00 2001 From: Antoine Lyset Date: Tue, 22 Jan 2013 22:44:53 +0100 Subject: Improve String#squish whitespaces matching --- activesupport/CHANGELOG.md | 2 ++ activesupport/lib/active_support/core_ext/string/filters.rb | 7 +++++-- activesupport/test/core_ext/string_ext_test.rb | 7 ++++--- guides/source/active_support_core_extensions.md | 2 ++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md index 72f28aefc7..7c414efd5b 100644 --- a/activesupport/CHANGELOG.md +++ b/activesupport/CHANGELOG.md @@ -399,4 +399,6 @@ * Optimize log subscribers to check log level before doing any processing. *Brian Durand* +* Improve String#squish to handle Unicode whitespace. *Antoine Lyset* + Please check [3-2-stable](https://github.com/rails/rails/blob/3-2-stable/activesupport/CHANGELOG.md) for previous changes. diff --git a/activesupport/lib/active_support/core_ext/string/filters.rb b/activesupport/lib/active_support/core_ext/string/filters.rb index e05447439a..1811f9f861 100644 --- a/activesupport/lib/active_support/core_ext/string/filters.rb +++ b/activesupport/lib/active_support/core_ext/string/filters.rb @@ -3,6 +3,8 @@ class String # the string, and then changing remaining consecutive whitespace # groups into one space each. # + # Note that it handles both ASCII and Unicode whitespace like mongolian vowel separator (U+180E). + # # %{ Multi-line # string }.squish # => "Multi-line string" # " foo bar \n \t boo".squish # => "foo bar boo" @@ -12,8 +14,9 @@ class String # Performs a destructive squish. See String#squish. def squish! - strip! - gsub!(/\s+/, ' ') + gsub!(/\A[[:space:]]+/, '') + gsub!(/[[:space:]]+\Z/, '') + gsub!(/[[:space:]]+/, ' ') self end diff --git a/activesupport/test/core_ext/string_ext_test.rb b/activesupport/test/core_ext/string_ext_test.rb index db1cf14abf..3549331d67 100644 --- a/activesupport/test/core_ext/string_ext_test.rb +++ b/activesupport/test/core_ext/string_ext_test.rb @@ -223,10 +223,11 @@ class StringInflectionsTest < ActiveSupport::TestCase end def test_string_squish - original = %{ A string with tabs(\t\t), newlines(\n\n), and - many spaces( ). } + original = %{\u180E\u180E A string surrounded by unicode mongolian vowel separators, + with tabs(\t\t), newlines(\n\n), unicode nextlines(\u0085\u0085) and many spaces( ). \u180E\u180E} - expected = "A string with tabs( ), newlines( ), and many spaces( )." + expected = "A string surrounded by unicode mongolian vowel separators, " + + "with tabs( ), newlines( ), unicode nextlines( ) and many spaces( )." # Make sure squish returns what we expect: assert_equal original.squish, expected diff --git a/guides/source/active_support_core_extensions.md b/guides/source/active_support_core_extensions.md index 3c1bb0f132..f02b377832 100644 --- a/guides/source/active_support_core_extensions.md +++ b/guides/source/active_support_core_extensions.md @@ -1233,6 +1233,8 @@ The method `squish` strips leading and trailing whitespace, and substitutes runs There's also the destructive version `String#squish!`. +Note that it handles both ASCII and Unicode whitespace like mongolian vowel separator (U+180E). + NOTE: Defined in `active_support/core_ext/string/filters.rb`. ### `truncate` -- cgit v1.2.3