diff options
Diffstat (limited to 'activesupport/lib/active_support/core_ext/string')
3 files changed, 48 insertions, 4 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/filters.rb b/activesupport/lib/active_support/core_ext/string/filters.rb index 66e721eea3..df0e79afa8 100644 --- a/activesupport/lib/active_support/core_ext/string/filters.rb +++ b/activesupport/lib/active_support/core_ext/string/filters.rb @@ -78,6 +78,47 @@ class String "#{self[0, stop]}#{omission}" end + # Truncates +text+ to at most <tt>bytesize</tt> bytes in length without + # breaking string encoding by splitting multibyte characters or breaking + # grapheme clusters ("perceptual characters") by truncating at combining + # characters. + # + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size + # => 20 + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize + # => 80 + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20) + # => "🔪🔪🔪🔪…" + # + # The truncated text ends with the <tt>:omission</tt> string, defaulting + # to "…", for a total length not exceeding <tt>bytesize</tt>. + def truncate_bytes(truncate_at, omission: "…") + omission ||= "" + + case + when bytesize <= truncate_at + dup + when omission.bytesize > truncate_at + raise ArgumentError, "Omission #{omission.inspect} is #{omission.bytesize}, larger than the truncation length of #{truncate_at} bytes" + when omission.bytesize == truncate_at + omission.dup + else + self.class.new.tap do |cut| + cut_at = truncate_at - omission.bytesize + + scan(/\X/) do |grapheme| + if cut.bytesize + grapheme.bytesize <= cut_at + cut << grapheme + else + break + end + end + + cut << omission + end + end + end + # Truncates a given +text+ after a given number of words (<tt>words_count</tt>): # # 'Once upon a time in a world far far away'.truncate_words(4) diff --git a/activesupport/lib/active_support/core_ext/string/multibyte.rb b/activesupport/lib/active_support/core_ext/string/multibyte.rb index 07c0d16398..6cceb46507 100644 --- a/activesupport/lib/active_support/core_ext/string/multibyte.rb +++ b/activesupport/lib/active_support/core_ext/string/multibyte.rb @@ -11,12 +11,13 @@ class String # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsulated string. # - # >> "lj".upcase - # => "lj" # >> "lj".mb_chars.upcase.to_s # => "LJ" # - # NOTE: An above example is useful for pre Ruby 2.4. Ruby 2.4 supports Unicode case mappings. + # NOTE: Ruby 2.4 and later support native Unicode case mappings: + # + # >> "lj".upcase + # => "LJ" # # == Method chaining # diff --git a/activesupport/lib/active_support/core_ext/string/strip.rb b/activesupport/lib/active_support/core_ext/string/strip.rb index cc26274e4a..6f9834bb16 100644 --- a/activesupport/lib/active_support/core_ext/string/strip.rb +++ b/activesupport/lib/active_support/core_ext/string/strip.rb @@ -20,6 +20,8 @@ class String # Technically, it looks for the least indented non-empty line # in the whole string, and removes that amount of leading whitespace. def strip_heredoc - gsub(/^#{scan(/^[ \t]*(?=\S)/).min}/, "".freeze) + gsub(/^#{scan(/^[ \t]*(?=\S)/).min}/, "".freeze).tap do |stripped| + stripped.freeze if frozen? + end end end |