From 7b44ffa21efe3b9608254701ffdf44f743b1a324 Mon Sep 17 00:00:00 2001 From: "Cory Gwin @gwincr11" Date: Fri, 3 Nov 2017 21:26:36 -0400 Subject: Add support for multiple encodings in String.blank? Motivation: - When strings are encoded with `.encode("UTF-16LE")` `.blank?` throws an `Encoding::CompatibilityError` exception. - We tested multiple implementation to see what the fastest implementation was, rescueing the execption seems to be the fastest option we could find. Related Issues: - #28953 Changes: - Add a rescue to catch the exception. - Added a `Concurrent::Map` to store a cache of encoded regex objects for requested encoding types. - Use the new `Concurrent::Map` cache to return the correct regex for the string being checked. --- activesupport/lib/active_support/core_ext/object/blank.rb | 11 ++++++++++- activesupport/test/core_ext/object/blank_test.rb | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'activesupport') diff --git a/activesupport/lib/active_support/core_ext/object/blank.rb b/activesupport/lib/active_support/core_ext/object/blank.rb index e42ad852dd..2ca431ab10 100644 --- a/activesupport/lib/active_support/core_ext/object/blank.rb +++ b/activesupport/lib/active_support/core_ext/object/blank.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "active_support/core_ext/regexp" +require "concurrent/map" class Object # An object is blank if it's false, empty, or a whitespace string. @@ -102,6 +103,9 @@ end class String BLANK_RE = /\A[[:space:]]*\z/ + ENCODED_BLANKS = Concurrent::Map.new do |h, enc| + h[enc] = Regexp.new(BLANK_RE.source.encode(enc), BLANK_RE.options | Regexp::FIXEDENCODING) + end # A string is blank if it's empty or contains whitespaces only: # @@ -119,7 +123,12 @@ class String # The regexp that matches blank strings is expensive. For the case of empty # strings we can speed up this method (~3.5x) with an empty? call. The # penalty for the rest of strings is marginal. - empty? || BLANK_RE.match?(self) + empty? || + begin + BLANK_RE.match?(self) + rescue Encoding::CompatibilityError + ENCODED_BLANKS[self.encoding].match?(self) + end end end diff --git a/activesupport/test/core_ext/object/blank_test.rb b/activesupport/test/core_ext/object/blank_test.rb index 749e59ec00..35a3e5922e 100644 --- a/activesupport/test/core_ext/object/blank_test.rb +++ b/activesupport/test/core_ext/object/blank_test.rb @@ -16,8 +16,8 @@ class BlankTest < ActiveSupport::TestCase end end - BLANK = [ EmptyTrue.new, nil, false, "", " ", " \n\t \r ", " ", "\u00a0", [], {} ] - NOT = [ EmptyFalse.new, Object.new, true, 0, 1, "a", [nil], { nil => 0 }, Time.now ] + BLANK = [ EmptyTrue.new, nil, false, "", " ", " \n\t \r ", " ", "\u00a0", [], {}, " ".encode("UTF-16LE")] + NOT = [ EmptyFalse.new, Object.new, true, 0, 1, "a", [nil], { nil => 0 }, Time.now , "my value".encode("UTF-16LE")] def test_blank BLANK.each { |v| assert_equal true, v.blank?, "#{v.inspect} should be blank" } -- cgit v1.2.3