aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/handlers
diff options
context:
space:
mode:
authorMichael Koziarski <michael@koziarski.com>2006-10-17 08:29:16 +0000
committerMichael Koziarski <michael@koziarski.com>2006-10-17 08:29:16 +0000
commit911f3db00abb7b35b400973c032e4e5c340bce6f (patch)
tree7b8435b9160ed8b8d8e1a8ba537950685e915b23 /activesupport/lib/active_support/multibyte/handlers
parent2d3367669156ec4149c8773a6afa93d220c36e7f (diff)
downloadrails-911f3db00abb7b35b400973c032e4e5c340bce6f.tar.gz
rails-911f3db00abb7b35b400973c032e4e5c340bce6f.tar.bz2
rails-911f3db00abb7b35b400973c032e4e5c340bce6f.zip
Ensure Chars#tidy_bytes only tidies broken bytes. Closes #6397 [Manfred Stienstra]
git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@5316 5ecf4fe2-1ee6-0310-87b1-e25e094e27de
Diffstat (limited to 'activesupport/lib/active_support/multibyte/handlers')
-rw-r--r--activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb17
1 files changed, 11 insertions, 6 deletions
diff --git a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
index 6c8eb88702..5b64734297 100644
--- a/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
+++ b/activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb
@@ -259,13 +259,18 @@ module ActiveSupport::Multibyte::Handlers
g_unpack(str).length
end
- # Strips all the non-utf-8 bytes from the string resulting in a valid utf-8 string
+ # Replaces all the non-utf-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid utf-8 string
def tidy_bytes(str)
- str.unpack('C*').map { |n|
- n < 128 ? n.chr :
- n < 160 ? [UCD.cp1252[n] || n].pack('U') :
- n < 192 ? "\xC2" + n.chr : "\xC3" + (n-64).chr
- }.join
+ str.split(//u).map do |c|
+ if !UTF8_PAT.match(c)
+ n = c.unpack('C')[0]
+ n < 128 ? n.chr :
+ n < 160 ? [UCD.cp1252[n] || n].pack('U') :
+ n < 192 ? "\xC2" + n.chr : "\xC3" + (n-64).chr
+ else
+ c
+ end
+ end.join
end
protected