diff options
author | Jeremy Daer <jeremydaer@gmail.com> | 2016-12-09 11:34:35 -0700 |
---|---|---|
committer | Jeremy Daer <jeremydaer@gmail.com> | 2018-02-18 00:14:51 -0800 |
commit | 4940cc49ddb361d584d51bc3eb4675ff8ece4a2b (patch) | |
tree | ef8c912363d354b379eb56bc8b2bd1bb7cea83dc /activesupport/lib | |
parent | 8454aeeb2b8e7d1255acbaaec4e0ae3c97d55f49 (diff) | |
download | rails-4940cc49ddb361d584d51bc3eb4675ff8ece4a2b.tar.gz rails-4940cc49ddb361d584d51bc3eb4675ff8ece4a2b.tar.bz2 rails-4940cc49ddb361d584d51bc3eb4675ff8ece4a2b.zip |
String#truncate_bytes: limit to N bytes without breaking multibyte chars
This faithfully preserves grapheme clusters (characters composed of other
characters and combining marks) and other multibyte characters.
Diffstat (limited to 'activesupport/lib')
-rw-r--r-- | activesupport/lib/active_support/core_ext/string/filters.rb | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/core_ext/string/filters.rb b/activesupport/lib/active_support/core_ext/string/filters.rb index 66e721eea3..df0e79afa8 100644 --- a/activesupport/lib/active_support/core_ext/string/filters.rb +++ b/activesupport/lib/active_support/core_ext/string/filters.rb @@ -78,6 +78,47 @@ class String "#{self[0, stop]}#{omission}" end + # Truncates +text+ to at most <tt>bytesize</tt> bytes in length without + # breaking string encoding by splitting multibyte characters or breaking + # grapheme clusters ("perceptual characters") by truncating at combining + # characters. + # + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".size + # => 20 + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".bytesize + # => 80 + # >> "🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪🔪".truncate_bytes(20) + # => "🔪🔪🔪🔪…" + # + # The truncated text ends with the <tt>:omission</tt> string, defaulting + # to "…", for a total length not exceeding <tt>bytesize</tt>. + def truncate_bytes(truncate_at, omission: "…") + omission ||= "" + + case + when bytesize <= truncate_at + dup + when omission.bytesize > truncate_at + raise ArgumentError, "Omission #{omission.inspect} is #{omission.bytesize}, larger than the truncation length of #{truncate_at} bytes" + when omission.bytesize == truncate_at + omission.dup + else + self.class.new.tap do |cut| + cut_at = truncate_at - omission.bytesize + + scan(/\X/) do |grapheme| + if cut.bytesize + grapheme.bytesize <= cut_at + cut << grapheme + else + break + end + end + + cut << omission + end + end + end + # Truncates a given +text+ after a given number of words (<tt>words_count</tt>): # # 'Once upon a time in a world far far away'.truncate_words(4) |