From 9a73630d935e360f3dc896e50dd673afb97cf3b5 Mon Sep 17 00:00:00 2001 From: Michael Koziarski Date: Mon, 31 Aug 2009 12:16:22 -0700 Subject: Add verify and clean methods to ActiveSupport::Multibyte. When accepting character input from outside of your application you can't blindly trust that all strings are properly encoded. With these methods you can check incoming strings and clean them up if necessary. Signed-off-by: Michael Koziarski Conflicts: activesupport/lib/active_support/multibyte.rb --- activesupport/lib/active_support/multibyte.rb | 32 +++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'activesupport/lib/active_support/multibyte.rb') diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb index d8d58f3bce..f59285daba 100644 --- a/activesupport/lib/active_support/multibyte.rb +++ b/activesupport/lib/active_support/multibyte.rb @@ -29,7 +29,35 @@ module ActiveSupport #:nodoc: # # Example: # ActiveSupport::Multibyte.proxy_class = CharsForUTF32 - mattr_accessor :proxy_class - self.proxy_class = ActiveSupport::Multibyte::Chars + def self.proxy_class=(klass) + @proxy_class = klass + end + + # Returns the currect proxy class + def self.proxy_class + @proxy_class ||= ActiveSupport::Multibyte::Chars + end + + # Regular expressions that describe valid byte sequences for a character + VALID_CHARACTER = { + # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site) + 'UTF-8' => /\A(?: + [\x00-\x7f] | + [\xc2-\xdf] [\x80-\xbf] | + \xe0 [\xa0-\xbf] [\x80-\xbf] | + [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | + \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | + [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | + \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn, + # Quick check for valid Shift-JIS characters, disregards the odd-even pairing + 'Shift_JIS' => /\A(?: + [\x00-\x7e \xa1-\xdf] | + [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn + } end end + +require 'active_support/multibyte/chars' +require 'active_support/multibyte/exceptions' +require 'active_support/multibyte/unicode_database' +require 'active_support/multibyte/utils' -- cgit v1.2.3