aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/core_ext/string
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/lib/active_support/core_ext/string')
-rw-r--r--activesupport/lib/active_support/core_ext/string/filters.rb41
-rw-r--r--activesupport/lib/active_support/core_ext/string/multibyte.rb7
-rw-r--r--activesupport/lib/active_support/core_ext/string/strip.rb4
3 files changed, 48 insertions, 4 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/filters.rb b/activesupport/lib/active_support/core_ext/string/filters.rb
index 66e721eea3..df0e79afa8 100644
--- a/activesupport/lib/active_support/core_ext/string/filters.rb
+++ b/activesupport/lib/active_support/core_ext/string/filters.rb
@@ -78,6 +78,47 @@ class String
"#{self[0, stop]}#{omission}"
end
+ # Truncates +text+ to at most <tt>bytesize</tt> bytes in length without
+ # breaking string encoding by splitting multibyte characters or breaking
+ # grapheme clusters ("perceptual characters") by truncating at combining
+ # characters.
+ #
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size
+ # => 20
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize
+ # => 80
+ # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20)
+ # => "🔪🔪🔪🔪…"
+ #
+ # The truncated text ends with the <tt>:omission</tt> string, defaulting
+ # to "…", for a total length not exceeding <tt>bytesize</tt>.
+ def truncate_bytes(truncate_at, omission: "…")
+ omission ||= ""
+
+ case
+ when bytesize <= truncate_at
+ dup
+ when omission.bytesize > truncate_at
+ raise ArgumentError, "Omission #{omission.inspect} is #{omission.bytesize}, larger than the truncation length of #{truncate_at} bytes"
+ when omission.bytesize == truncate_at
+ omission.dup
+ else
+ self.class.new.tap do |cut|
+ cut_at = truncate_at - omission.bytesize
+
+ scan(/\X/) do |grapheme|
+ if cut.bytesize + grapheme.bytesize <= cut_at
+ cut << grapheme
+ else
+ break
+ end
+ end
+
+ cut << omission
+ end
+ end
+ end
+
# Truncates a given +text+ after a given number of words (<tt>words_count</tt>):
#
# 'Once upon a time in a world far far away'.truncate_words(4)
diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb
index 07c0d16398..6cceb46507 100644
--- a/activesupport/lib/active_support/core_ext/string/multibyte.rb
+++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb
@@ -11,12 +11,13 @@ class String
# encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
# class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsulated string.
#
- # >> "lj".upcase
- # => "lj"
# >> "lj".mb_chars.upcase.to_s
# => "LJ"
#
- # NOTE: An above example is useful for pre Ruby 2.4. Ruby 2.4 supports Unicode case mappings.
+ # NOTE: Ruby 2.4 and later support native Unicode case mappings:
+ #
+ # >> "lj".upcase
+ # => "LJ"
#
# == Method chaining
#
diff --git a/activesupport/lib/active_support/core_ext/string/strip.rb b/activesupport/lib/active_support/core_ext/string/strip.rb
index cc26274e4a..6f9834bb16 100644
--- a/activesupport/lib/active_support/core_ext/string/strip.rb
+++ b/activesupport/lib/active_support/core_ext/string/strip.rb
@@ -20,6 +20,8 @@ class String
# Technically, it looks for the least indented non-empty line
# in the whole string, and removes that amount of leading whitespace.
def strip_heredoc
- gsub(/^#{scan(/^[ \t]*(?=\S)/).min}/, "".freeze)
+ gsub(/^#{scan(/^[ \t]*(?=\S)/).min}/, "".freeze).tap do |stripped|
+ stripped.freeze if frozen?
+ end
end
end