diff options
author | Michael Koziarski <michael@koziarski.com> | 2009-08-31 12:16:22 -0700 |
---|---|---|
committer | Michael Koziarski <michael@koziarski.com> | 2009-09-04 09:25:38 +1200 |
commit | 9a73630d935e360f3dc896e50dd673afb97cf3b5 (patch) | |
tree | e404f7dbfc142a10b45b758f0cea23812abc9f23 /activesupport/lib/active_support/multibyte.rb | |
parent | 5e6dab8b34152bc48c89032d20e5bda1511e28fb (diff) | |
download | rails-9a73630d935e360f3dc896e50dd673afb97cf3b5.tar.gz rails-9a73630d935e360f3dc896e50dd673afb97cf3b5.tar.bz2 rails-9a73630d935e360f3dc896e50dd673afb97cf3b5.zip |
Add verify and clean methods to ActiveSupport::Multibyte.
When accepting character input from outside of your application you can't
blindly trust that all strings are properly encoded. With these methods
you can check incoming strings and clean them up if necessary.
Signed-off-by: Michael Koziarski <michael@koziarski.com>
Conflicts:
activesupport/lib/active_support/multibyte.rb
Diffstat (limited to 'activesupport/lib/active_support/multibyte.rb')
-rw-r--r-- | activesupport/lib/active_support/multibyte.rb | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb index d8d58f3bce..f59285daba 100644 --- a/activesupport/lib/active_support/multibyte.rb +++ b/activesupport/lib/active_support/multibyte.rb @@ -29,7 +29,35 @@ module ActiveSupport #:nodoc: # # Example: # ActiveSupport::Multibyte.proxy_class = CharsForUTF32 - mattr_accessor :proxy_class - self.proxy_class = ActiveSupport::Multibyte::Chars + def self.proxy_class=(klass) + @proxy_class = klass + end + + # Returns the currect proxy class + def self.proxy_class + @proxy_class ||= ActiveSupport::Multibyte::Chars + end + + # Regular expressions that describe valid byte sequences for a character + VALID_CHARACTER = { + # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site) + 'UTF-8' => /\A(?: + [\x00-\x7f] | + [\xc2-\xdf] [\x80-\xbf] | + \xe0 [\xa0-\xbf] [\x80-\xbf] | + [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | + \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | + [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | + \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn, + # Quick check for valid Shift-JIS characters, disregards the odd-even pairing + 'Shift_JIS' => /\A(?: + [\x00-\x7e \xa1-\xdf] | + [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn + } end end + +require 'active_support/multibyte/chars' +require 'active_support/multibyte/exceptions' +require 'active_support/multibyte/unicode_database' +require 'active_support/multibyte/utils' |