aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte.rb
diff options
context:
space:
mode:
authorMichael Koziarski <michael@koziarski.com>2009-08-31 12:16:22 -0700
committerMichael Koziarski <michael@koziarski.com>2009-09-04 09:25:38 +1200
commit9a73630d935e360f3dc896e50dd673afb97cf3b5 (patch)
treee404f7dbfc142a10b45b758f0cea23812abc9f23 /activesupport/lib/active_support/multibyte.rb
parent5e6dab8b34152bc48c89032d20e5bda1511e28fb (diff)
downloadrails-9a73630d935e360f3dc896e50dd673afb97cf3b5.tar.gz
rails-9a73630d935e360f3dc896e50dd673afb97cf3b5.tar.bz2
rails-9a73630d935e360f3dc896e50dd673afb97cf3b5.zip
Add verify and clean methods to ActiveSupport::Multibyte.
When accepting character input from outside of your application you can't blindly trust that all strings are properly encoded. With these methods you can check incoming strings and clean them up if necessary. Signed-off-by: Michael Koziarski <michael@koziarski.com> Conflicts: activesupport/lib/active_support/multibyte.rb
Diffstat (limited to 'activesupport/lib/active_support/multibyte.rb')
-rw-r--r--activesupport/lib/active_support/multibyte.rb32
1 files changed, 30 insertions, 2 deletions
diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb
index d8d58f3bce..f59285daba 100644
--- a/activesupport/lib/active_support/multibyte.rb
+++ b/activesupport/lib/active_support/multibyte.rb
@@ -29,7 +29,35 @@ module ActiveSupport #:nodoc:
#
# Example:
# ActiveSupport::Multibyte.proxy_class = CharsForUTF32
- mattr_accessor :proxy_class
- self.proxy_class = ActiveSupport::Multibyte::Chars
+ def self.proxy_class=(klass)
+ @proxy_class = klass
+ end
+
+ # Returns the currect proxy class
+ def self.proxy_class
+ @proxy_class ||= ActiveSupport::Multibyte::Chars
+ end
+
+ # Regular expressions that describe valid byte sequences for a character
+ VALID_CHARACTER = {
+ # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
+ 'UTF-8' => /\A(?:
+ [\x00-\x7f] |
+ [\xc2-\xdf] [\x80-\xbf] |
+ \xe0 [\xa0-\xbf] [\x80-\xbf] |
+ [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
+ \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
+ [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
+ \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
+ # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
+ 'Shift_JIS' => /\A(?:
+ [\x00-\x7e \xa1-\xdf] |
+ [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn
+ }
end
end
+
+require 'active_support/multibyte/chars'
+require 'active_support/multibyte/exceptions'
+require 'active_support/multibyte/unicode_database'
+require 'active_support/multibyte/utils'