From 9a73630d935e360f3dc896e50dd673afb97cf3b5 Mon Sep 17 00:00:00 2001 From: Michael Koziarski Date: Mon, 31 Aug 2009 12:16:22 -0700 Subject: Add verify and clean methods to ActiveSupport::Multibyte. When accepting character input from outside of your application you can't blindly trust that all strings are properly encoded. With these methods you can check incoming strings and clean them up if necessary. Signed-off-by: Michael Koziarski Conflicts: activesupport/lib/active_support/multibyte.rb --- .../lib/active_support/multibyte/utils.rb | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 activesupport/lib/active_support/multibyte/utils.rb (limited to 'activesupport/lib/active_support/multibyte/utils.rb') diff --git a/activesupport/lib/active_support/multibyte/utils.rb b/activesupport/lib/active_support/multibyte/utils.rb new file mode 100644 index 0000000000..acef84da91 --- /dev/null +++ b/activesupport/lib/active_support/multibyte/utils.rb @@ -0,0 +1,61 @@ +# encoding: utf-8 + +module ActiveSupport #:nodoc: + module Multibyte #:nodoc: + if Kernel.const_defined?(:Encoding) + # Returns a regular expression that matches valid characters in the current encoding + def self.valid_character + VALID_CHARACTER[Encoding.default_internal.to_s] + end + else + def self.valid_character + case $KCODE + when 'UTF8' + VALID_CHARACTER['UTF-8'] + when 'SJIS' + VALID_CHARACTER['Shift_JIS'] + end + end + end + + if 'string'.respond_to?(:valid_encoding?) + # Verifies the encoding of a string + def self.verify(string) + string.valid_encoding? + end + else + def self.verify(string) + if expression = valid_character + for c in string.split(//) + return false unless valid_character.match(c) + end + end + true + end + end + + # Verifies the encoding of the string and raises an exception when it's not valid + def self.verify!(string) + raise EncodingError.new("Found characters with invalid encoding") unless verify(string) + end + + if 'string'.respond_to?(:force_encoding) + # Removes all invalid characters from the string. + # + # Note: this method is a no-op in Ruby 1.9 + def self.clean(string) + string + end + else + def self.clean(string) + if expression = valid_character + stripped = []; for c in string.split(//) + stripped << c if valid_character.match(c) + end; stripped.join + else + string + end + end + end + end +end \ No newline at end of file -- cgit v1.2.3 From 38d65b03d6634b6993427213933aaa413a20c415 Mon Sep 17 00:00:00 2001 From: Beau Harrington Date: Wed, 9 Sep 2009 22:25:23 -0700 Subject: Remove redundant checks for valid character regexp in ActiveSupport::Multibyte#clean and #verify. [#3181 state:committed] Signed-off-by: Jeremy Kemper --- activesupport/lib/active_support/multibyte/utils.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'activesupport/lib/active_support/multibyte/utils.rb') diff --git a/activesupport/lib/active_support/multibyte/utils.rb b/activesupport/lib/active_support/multibyte/utils.rb index acef84da91..a7e40688de 100644 --- a/activesupport/lib/active_support/multibyte/utils.rb +++ b/activesupport/lib/active_support/multibyte/utils.rb @@ -27,7 +27,7 @@ module ActiveSupport #:nodoc: def self.verify(string) if expression = valid_character for c in string.split(//) - return false unless valid_character.match(c) + return false unless expression.match(c) end end true @@ -50,7 +50,7 @@ module ActiveSupport #:nodoc: def self.clean(string) if expression = valid_character stripped = []; for c in string.split(//) - stripped << c if valid_character.match(c) + stripped << c if expression.match(c) end; stripped.join else string -- cgit v1.2.3 From 91ffddca57d754f024b90d981acb146a5e9f5ab9 Mon Sep 17 00:00:00 2001 From: Jeremy Kemper Date: Sun, 13 Sep 2009 04:43:41 -0700 Subject: Use Encoding.default_external, not _internal --- activesupport/lib/active_support/multibyte/utils.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'activesupport/lib/active_support/multibyte/utils.rb') diff --git a/activesupport/lib/active_support/multibyte/utils.rb b/activesupport/lib/active_support/multibyte/utils.rb index a7e40688de..8e47763d39 100644 --- a/activesupport/lib/active_support/multibyte/utils.rb +++ b/activesupport/lib/active_support/multibyte/utils.rb @@ -5,7 +5,7 @@ module ActiveSupport #:nodoc: if Kernel.const_defined?(:Encoding) # Returns a regular expression that matches valid characters in the current encoding def self.valid_character - VALID_CHARACTER[Encoding.default_internal.to_s] + VALID_CHARACTER[Encoding.default_external.to_s] end else def self.valid_character @@ -58,4 +58,4 @@ module ActiveSupport #:nodoc: end end end -end \ No newline at end of file +end -- cgit v1.2.3