From 9a73630d935e360f3dc896e50dd673afb97cf3b5 Mon Sep 17 00:00:00 2001 From: Michael Koziarski Date: Mon, 31 Aug 2009 12:16:22 -0700 Subject: Add verify and clean methods to ActiveSupport::Multibyte. When accepting character input from outside of your application you can't blindly trust that all strings are properly encoded. With these methods you can check incoming strings and clean them up if necessary. Signed-off-by: Michael Koziarski Conflicts: activesupport/lib/active_support/multibyte.rb --- activesupport/lib/active_support/multibyte.rb | 32 +++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'activesupport/lib/active_support/multibyte.rb') diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb index d8d58f3bce..f59285daba 100644 --- a/activesupport/lib/active_support/multibyte.rb +++ b/activesupport/lib/active_support/multibyte.rb @@ -29,7 +29,35 @@ module ActiveSupport #:nodoc: # # Example: # ActiveSupport::Multibyte.proxy_class = CharsForUTF32 - mattr_accessor :proxy_class - self.proxy_class = ActiveSupport::Multibyte::Chars + def self.proxy_class=(klass) + @proxy_class = klass + end + + # Returns the currect proxy class + def self.proxy_class + @proxy_class ||= ActiveSupport::Multibyte::Chars + end + + # Regular expressions that describe valid byte sequences for a character + VALID_CHARACTER = { + # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site) + 'UTF-8' => /\A(?: + [\x00-\x7f] | + [\xc2-\xdf] [\x80-\xbf] | + \xe0 [\xa0-\xbf] [\x80-\xbf] | + [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | + \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | + [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | + \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn, + # Quick check for valid Shift-JIS characters, disregards the odd-even pairing + 'Shift_JIS' => /\A(?: + [\x00-\x7e \xa1-\xdf] | + [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn + } end end + +require 'active_support/multibyte/chars' +require 'active_support/multibyte/exceptions' +require 'active_support/multibyte/unicode_database' +require 'active_support/multibyte/utils' -- cgit v1.2.3 From 87c93da440b099856fe3437c5bb269f01fb95379 Mon Sep 17 00:00:00 2001 From: Michael Koziarski Date: Fri, 4 Sep 2009 14:28:32 +1200 Subject: Fix a messed up merge commit --- activesupport/lib/active_support/multibyte.rb | 4 ---- 1 file changed, 4 deletions(-) (limited to 'activesupport/lib/active_support/multibyte.rb') diff --git a/activesupport/lib/active_support/multibyte.rb b/activesupport/lib/active_support/multibyte.rb index f59285daba..6f2016a409 100644 --- a/activesupport/lib/active_support/multibyte.rb +++ b/activesupport/lib/active_support/multibyte.rb @@ -1,9 +1,5 @@ # encoding: utf-8 -require 'active_support/multibyte/chars' -require 'active_support/multibyte/exceptions' -require 'active_support/multibyte/unicode_database' - require 'active_support/core_ext/module/attribute_accessors' module ActiveSupport #:nodoc: -- cgit v1.2.3