aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--activesupport/CHANGELOG.md5
-rw-r--r--activesupport/lib/active_support/core_ext/string/filters.rb41
-rw-r--r--activesupport/test/core_ext/string_ext_test.rb62
3 files changed, 108 insertions, 0 deletions
diff --git a/activesupport/CHANGELOG.md b/activesupport/CHANGELOG.md
index a844d1d8d7..66b7365916 100644
--- a/activesupport/CHANGELOG.md
+++ b/activesupport/CHANGELOG.md
@@ -1,5 +1,10 @@
## Rails 6.0.0.alpha (Unreleased) ##
+* `String#truncate_bytes` to truncate a string to a maximum bytesize without
+ breaking multibyte characters or grapheme clusters like 👩‍👩‍👦‍👦.
+
+ *Jeremy Daer*
+
* `String#strip_heredoc` preserves frozenness.
"foo".freeze.strip_heredoc.frozen? # => true
diff --git a/activesupport/lib/active_support/core_ext/string/filters.rb b/activesupport/lib/active_support/core_ext/string/filters.rb
index 66e721eea3..df0e79afa8 100644
--- a/activesupport/lib/active_support/core_ext/string/filters.rb
+++ b/activesupport/lib/active_support/core_ext/string/filters.rb
@@ -78,6 +78,47 @@ class String
"#{self[0, stop]}#{omission}"
end
+ # Truncates +text+ to at most <tt>bytesize</tt> bytes in length without
+ # breaking string encoding by splitting multibyte characters or breaking
+ # grapheme clusters ("perceptual characters") by truncating at combining
+ # characters.
+ #
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size
+ # => 20
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize
+ # => 80
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20)
+ # => "🔪🔪🔪🔪…"
+ #
+ # The truncated text ends with the <tt>:omission</tt> string, defaulting
+ # to "…", for a total length not exceeding <tt>bytesize</tt>.
+ def truncate_bytes(truncate_at, omission: "…")
+ omission ||= ""
+
+ case
+ when bytesize <= truncate_at
+ dup
+ when omission.bytesize > truncate_at
+ raise ArgumentError, "Omission #{omission.inspect} is #{omission.bytesize}, larger than the truncation length of #{truncate_at} bytes"
+ when omission.bytesize == truncate_at
+ omission.dup
+ else
+ self.class.new.tap do |cut|
+ cut_at = truncate_at - omission.bytesize
+
+ scan(/\X/) do |grapheme|
+ if cut.bytesize + grapheme.bytesize <= cut_at
+ cut << grapheme
+ else
+ break
+ end
+ end
+
+ cut << omission
+ end
+ end
+ end
+
# Truncates a given +text+ after a given number of words (<tt>words_count</tt>):
#
# 'Once upon a time in a world far far away'.truncate_words(4)
diff --git a/activesupport/test/core_ext/string_ext_test.rb b/activesupport/test/core_ext/string_ext_test.rb
index b95d2d8307..ccaa5dc786 100644
--- a/activesupport/test/core_ext/string_ext_test.rb
+++ b/activesupport/test/core_ext/string_ext_test.rb
@@ -285,6 +285,68 @@ class StringInflectionsTest < ActiveSupport::TestCase
assert_equal "Hello Big[...]", "Hello Big World!".truncate(15, omission: "[...]", separator: /\s/)
end
+ def test_truncate_bytes
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16)
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: nil)
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: " ")
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: "🖖")
+
+ assert_equal "👍👍👍…", "👍👍👍👍".truncate_bytes(15)
+ assert_equal "👍👍👍", "👍👍👍👍".truncate_bytes(15, omission: nil)
+ assert_equal "👍👍👍 ", "👍👍👍👍".truncate_bytes(15, omission: " ")
+ assert_equal "👍👍🖖", "👍👍👍👍".truncate_bytes(15, omission: "🖖")
+
+ assert_equal "…", "👍👍👍👍".truncate_bytes(5)
+ assert_equal "👍", "👍👍👍👍".truncate_bytes(5, omission: nil)
+ assert_equal "👍 ", "👍👍👍👍".truncate_bytes(5, omission: " ")
+ assert_equal "🖖", "👍👍👍👍".truncate_bytes(5, omission: "🖖")
+
+ assert_equal "…", "👍👍👍👍".truncate_bytes(4)
+ assert_equal "👍", "👍👍👍👍".truncate_bytes(4, omission: nil)
+ assert_equal " ", "👍👍👍👍".truncate_bytes(4, omission: " ")
+ assert_equal "🖖", "👍👍👍👍".truncate_bytes(4, omission: "🖖")
+
+ assert_raise ArgumentError do
+ "👍👍👍👍".truncate_bytes(3, omission: "🖖")
+ end
+ end
+
+ def test_truncate_bytes_preserves_codepoints
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16)
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: nil)
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: " ")
+ assert_equal "👍👍👍👍", "👍👍👍👍".truncate_bytes(16, omission: "🖖")
+
+ assert_equal "👍👍👍…", "👍👍👍👍".truncate_bytes(15)
+ assert_equal "👍👍👍", "👍👍👍👍".truncate_bytes(15, omission: nil)
+ assert_equal "👍👍👍 ", "👍👍👍👍".truncate_bytes(15, omission: " ")
+ assert_equal "👍👍🖖", "👍👍👍👍".truncate_bytes(15, omission: "🖖")
+
+ assert_equal "…", "👍👍👍👍".truncate_bytes(5)
+ assert_equal "👍", "👍👍👍👍".truncate_bytes(5, omission: nil)
+ assert_equal "👍 ", "👍👍👍👍".truncate_bytes(5, omission: " ")
+ assert_equal "🖖", "👍👍👍👍".truncate_bytes(5, omission: "🖖")
+
+ assert_equal "…", "👍👍👍👍".truncate_bytes(4)
+ assert_equal "👍", "👍👍👍👍".truncate_bytes(4, omission: nil)
+ assert_equal " ", "👍👍👍👍".truncate_bytes(4, omission: " ")
+ assert_equal "🖖", "👍👍👍👍".truncate_bytes(4, omission: "🖖")
+
+ assert_raise ArgumentError do
+ "👍👍👍👍".truncate_bytes(3, omission: "🖖")
+ end
+ end
+
+ def test_truncates_bytes_preserves_grapheme_clusters
+ assert_equal "a ", "a ❤️ b".truncate_bytes(2, omission: nil)
+ assert_equal "a ", "a ❤️ b".truncate_bytes(3, omission: nil)
+ assert_equal "a ", "a ❤️ b".truncate_bytes(7, omission: nil)
+ assert_equal "a ❤️", "a ❤️ b".truncate_bytes(8, omission: nil)
+
+ assert_equal "a ", "a 👩‍❤️‍👩".truncate_bytes(13, omission: nil)
+ assert_equal "", "👩‍❤️‍👩".truncate_bytes(13, omission: nil)
+ end
+
def test_truncate_words
assert_equal "Hello Big World!", "Hello Big World!".truncate_words(3)
assert_equal "Hello Big...", "Hello Big World!".truncate_words(2)