aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport
diff options
context:
space:
mode:
authorwycats <wycats@gmail.com>2010-04-10 17:22:52 -0400
committerwycats <wycats@gmail.com>2010-04-10 17:22:52 -0400
commit87f7093ee3306f417e1136d947eba200d40ff8e7 (patch)
treeba70dbdaf67e12fc067bb5d8343d7681932452ef /activesupport
parentee8e9d548472fb8cb8792a569e579c6513be77d6 (diff)
parent381f877bbbbf81d679f5be3b7ac7e961d41502bd (diff)
downloadrails-87f7093ee3306f417e1136d947eba200d40ff8e7.tar.gz
rails-87f7093ee3306f417e1136d947eba200d40ff8e7.tar.bz2
rails-87f7093ee3306f417e1136d947eba200d40ff8e7.zip
Merge branch 'master' into docrails_master
Diffstat (limited to 'activesupport')
-rw-r--r--activesupport/CHANGELOG5
-rw-r--r--activesupport/lib/active_support/backtrace_cleaner.rb8
-rw-r--r--activesupport/lib/active_support/cache/mem_cache_store.rb6
-rw-r--r--activesupport/lib/active_support/cache/strategy/local_cache.rb2
-rw-r--r--activesupport/lib/active_support/callbacks.rb2
-rw-r--r--activesupport/lib/active_support/core_ext/array/conversions.rb2
-rw-r--r--activesupport/lib/active_support/core_ext/module/aliasing.rb2
-rw-r--r--activesupport/lib/active_support/core_ext/module/attr_accessor_with_default.rb2
-rw-r--r--activesupport/lib/active_support/core_ext/module/delegation.rb2
-rw-r--r--activesupport/lib/active_support/core_ext/module/synchronization.rb2
-rw-r--r--activesupport/lib/active_support/multibyte/chars.rb85
-rw-r--r--activesupport/lib/active_support/ordered_hash.rb54
-rw-r--r--activesupport/lib/active_support/version.rb2
-rw-r--r--activesupport/test/clean_backtrace_test.rb7
-rw-r--r--activesupport/test/multibyte_chars_test.rb73
15 files changed, 167 insertions, 87 deletions
diff --git a/activesupport/CHANGELOG b/activesupport/CHANGELOG
index b9f565c71d..a5a7a9b904 100644
--- a/activesupport/CHANGELOG
+++ b/activesupport/CHANGELOG
@@ -1,3 +1,8 @@
+*Rails 3.0.0 [beta 3] (pending)*
+
+* Speed up and add Ruby 1.9 support for ActiveSupport::Multibyte::Chars#tidy_bytes. #4350 [Norman Clarke]
+
+
*Rails 3.0.0 [beta 2] (April 1st, 2010)*
* Reduced load time by deferring configuration of classes using
diff --git a/activesupport/lib/active_support/backtrace_cleaner.rb b/activesupport/lib/active_support/backtrace_cleaner.rb
index 6fab565646..8465bc1e10 100644
--- a/activesupport/lib/active_support/backtrace_cleaner.rb
+++ b/activesupport/lib/active_support/backtrace_cleaner.rb
@@ -4,7 +4,9 @@ module ActiveSupport
# context, so only the relevant lines are included.
#
# If you need to reconfigure an existing BacktraceCleaner, like the one in Rails, to show as much as possible, you can always
- # call BacktraceCleaner#remove_silencers!
+ # call BacktraceCleaner#remove_silencers! Also, if you need to reconfigure an existing BacktraceCleaner so that it does not
+ # filter or modify the paths of any lines of the backtrace, you can call BacktraceCleaner#remove_filters! These two methods
+ # will give you a completely untouched backtrace.
#
# Example:
#
@@ -60,6 +62,10 @@ module ActiveSupport
@silencers = []
end
+ def remove_filters!
+ @filters = []
+ end
+
private
def filter(backtrace)
@filters.each do |f|
diff --git a/activesupport/lib/active_support/cache/mem_cache_store.rb b/activesupport/lib/active_support/cache/mem_cache_store.rb
index d84a62ca2d..c56fedc12e 100644
--- a/activesupport/lib/active_support/cache/mem_cache_store.rb
+++ b/activesupport/lib/active_support/cache/mem_cache_store.rb
@@ -64,7 +64,7 @@ module ActiveSupport
@data.get(key, raw?(options))
end
rescue MemCache::MemCacheError => e
- logger.error("MemCacheError (#{e}): #{e.message}")
+ logger.error("MemCacheError (#{e}): #{e.message}") if logger
nil
end
@@ -85,7 +85,7 @@ module ActiveSupport
response == Response::STORED
end
rescue MemCache::MemCacheError => e
- logger.error("MemCacheError (#{e}): #{e.message}")
+ logger.error("MemCacheError (#{e}): #{e.message}") if logger
false
end
@@ -95,7 +95,7 @@ module ActiveSupport
response == Response::DELETED
end
rescue MemCache::MemCacheError => e
- logger.error("MemCacheError (#{e}): #{e.message}")
+ logger.error("MemCacheError (#{e}): #{e.message}") if logger
false
end
diff --git a/activesupport/lib/active_support/cache/strategy/local_cache.rb b/activesupport/lib/active_support/cache/strategy/local_cache.rb
index 86c7703c27..bbbd643736 100644
--- a/activesupport/lib/active_support/cache/strategy/local_cache.rb
+++ b/activesupport/lib/active_support/cache/strategy/local_cache.rb
@@ -18,7 +18,7 @@ module ActiveSupport
def middleware
@middleware ||= begin
klass = Class.new
- klass.class_eval(<<-EOS, __FILE__, __LINE__)
+ klass.class_eval(<<-EOS, __FILE__, __LINE__ + 1)
def initialize(app)
@app = app
end
diff --git a/activesupport/lib/active_support/callbacks.rb b/activesupport/lib/active_support/callbacks.rb
index c669630e47..5a7b94ead7 100644
--- a/activesupport/lib/active_support/callbacks.rb
+++ b/activesupport/lib/active_support/callbacks.rb
@@ -387,7 +387,7 @@ module ActiveSupport
send("_update_#{symbol}_superclass_callbacks")
body = send("_#{symbol}_callbacks").compile(nil)
- body, line = <<-RUBY_EVAL, __LINE__
+ body, line = <<-RUBY_EVAL, __LINE__ + 1
def _run_#{symbol}_callbacks(key = nil, &blk)
if self.class.send("_update_#{symbol}_superclass_callbacks")
self.class.__define_runner(#{symbol.inspect})
diff --git a/activesupport/lib/active_support/core_ext/array/conversions.rb b/activesupport/lib/active_support/core_ext/array/conversions.rb
index 2119322bfe..5d8e78e6e5 100644
--- a/activesupport/lib/active_support/core_ext/array/conversions.rb
+++ b/activesupport/lib/active_support/core_ext/array/conversions.rb
@@ -131,7 +131,7 @@ class Array
require 'builder' unless defined?(Builder)
options = options.dup
- options[:root] ||= all? { |e| e.is_a?(first.class) && first.class.to_s != "Hash" } ? ActiveSupport::Inflector.pluralize(ActiveSupport::Inflector.underscore(first.class.name)) : "records"
+ options[:root] ||= all? { |e| e.is_a?(first.class) && first.class.to_s != "Hash" } ? ActiveSupport::Inflector.pluralize(ActiveSupport::Inflector.underscore(first.class.name)).tr('/', '_') : "records"
options[:children] ||= options[:root].singularize
options[:indent] ||= 2
options[:builder] ||= Builder::XmlMarkup.new(:indent => options[:indent])
diff --git a/activesupport/lib/active_support/core_ext/module/aliasing.rb b/activesupport/lib/active_support/core_ext/module/aliasing.rb
index 3cad164148..ce481f0e84 100644
--- a/activesupport/lib/active_support/core_ext/module/aliasing.rb
+++ b/activesupport/lib/active_support/core_ext/module/aliasing.rb
@@ -61,7 +61,7 @@ class Module
# e.subject = "Megastars"
# e.title # => "Megastars"
def alias_attribute(new_name, old_name)
- module_eval <<-STR, __FILE__, __LINE__+1
+ module_eval <<-STR, __FILE__, __LINE__ + 1
def #{new_name}; self.#{old_name}; end # def subject; self.title; end
def #{new_name}?; self.#{old_name}?; end # def subject?; self.title?; end
def #{new_name}=(v); self.#{old_name} = v; end # def subject=(v); self.title = v; end
diff --git a/activesupport/lib/active_support/core_ext/module/attr_accessor_with_default.rb b/activesupport/lib/active_support/core_ext/module/attr_accessor_with_default.rb
index 4d0198f028..28ac89dab9 100644
--- a/activesupport/lib/active_support/core_ext/module/attr_accessor_with_default.rb
+++ b/activesupport/lib/active_support/core_ext/module/attr_accessor_with_default.rb
@@ -21,7 +21,7 @@ class Module
def attr_accessor_with_default(sym, default = nil, &block)
raise 'Default value or block required' unless !default.nil? || block
define_method(sym, block_given? ? block : Proc.new { default })
- module_eval(<<-EVAL, __FILE__, __LINE__)
+ module_eval(<<-EVAL, __FILE__, __LINE__ + 1)
def #{sym}=(value) # def age=(value)
class << self; attr_reader :#{sym} end # class << self; attr_reader :age end
@#{sym} = value # @age = value
diff --git a/activesupport/lib/active_support/core_ext/module/delegation.rb b/activesupport/lib/active_support/core_ext/module/delegation.rb
index b73f4c2b59..40a1866428 100644
--- a/activesupport/lib/active_support/core_ext/module/delegation.rb
+++ b/activesupport/lib/active_support/core_ext/module/delegation.rb
@@ -126,7 +126,7 @@ class Module
%(raise "#{self}##{prefix}#{method} delegated to #{to}.#{method}, but #{to} is nil: \#{self.inspect}")
end
- module_eval(<<-EOS, file, line)
+ module_eval(<<-EOS, file, line - 5)
if instance_methods(false).map(&:to_s).include?("#{prefix}#{method}")
remove_possible_method("#{prefix}#{method}")
end
diff --git a/activesupport/lib/active_support/core_ext/module/synchronization.rb b/activesupport/lib/active_support/core_ext/module/synchronization.rb
index 115b8abd4e..de76a069d6 100644
--- a/activesupport/lib/active_support/core_ext/module/synchronization.rb
+++ b/activesupport/lib/active_support/core_ext/module/synchronization.rb
@@ -28,7 +28,7 @@ class Module
raise ArgumentError, "#{method} is already synchronized. Double synchronization is not currently supported."
end
- module_eval(<<-EOS, __FILE__, __LINE__)
+ module_eval(<<-EOS, __FILE__, __LINE__ + 1)
def #{aliased_method}_with_synchronization#{punctuation}(*args, &block) # def expire_with_synchronization(*args, &block)
#{with}.synchronize do # @@lock.synchronize do
#{aliased_method}_without_synchronization#{punctuation}(*args, &block) # expire_without_synchronization(*args, &block)
diff --git a/activesupport/lib/active_support/multibyte/chars.rb b/activesupport/lib/active_support/multibyte/chars.rb
index 3eb0bf31f8..38007fd4e7 100644
--- a/activesupport/lib/active_support/multibyte/chars.rb
+++ b/activesupport/lib/active_support/multibyte/chars.rb
@@ -19,7 +19,7 @@ module ActiveSupport #:nodoc:
# bad.explicit_checking_method "T".mb_chars.downcase.to_s
#
# The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
- # encodings you can write your own multibyte string handler and configure it through
+ # encodings you can write your own multibyte string handler and configure it through
# ActiveSupport::Multibyte.proxy_class.
#
# class CharsForUTF32
@@ -458,8 +458,10 @@ module ActiveSupport #:nodoc:
end
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
- def tidy_bytes
- chars(self.class.tidy_bytes(@wrapped_string))
+ #
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
+ def tidy_bytes(force = false)
+ chars(self.class.tidy_bytes(@wrapped_string, force))
end
%w(lstrip rstrip strip reverse upcase downcase tidy_bytes capitalize).each do |method|
@@ -528,7 +530,7 @@ module ActiveSupport #:nodoc:
unpacked << codepoints[marker..pos-1]
marker = pos
end
- end
+ end
unpacked
end
@@ -644,33 +646,80 @@ module ActiveSupport #:nodoc:
codepoints
end
+ def tidy_byte(byte)
+ if byte < 160
+ [UCD.cp1252[byte] || byte].pack("U").unpack("C*")
+ elsif byte < 192
+ [194, byte]
+ else
+ [195, byte - 64]
+ end
+ end
+ private :tidy_byte
+
# Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
- def tidy_bytes(string)
- string.split(//u).map do |c|
- c.force_encoding(Encoding::ASCII) if c.respond_to?(:force_encoding)
-
- if !ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'].match(c)
- n = c.unpack('C')[0]
- n < 128 ? n.chr :
- n < 160 ? [UCD.cp1252[n] || n].pack('U') :
- n < 192 ? "\xC2" + n.chr : "\xC3" + (n-64).chr
+ #
+ # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP-1252 or ISO-8859-1.
+ def tidy_bytes(string, force = false)
+ if force
+ return string.unpack("C*").map do |b|
+ tidy_byte(b)
+ end.flatten.compact.pack("C*").unpack("U*").pack("U*")
+ end
+
+ bytes = string.unpack("C*")
+ conts_expected = 0
+ last_lead = 0
+
+ bytes.each_index do |i|
+
+ byte = bytes[i]
+ is_ascii = byte < 128
+ is_cont = byte > 127 && byte < 192
+ is_lead = byte > 191 && byte < 245
+ is_unused = byte > 240
+ is_restricted = byte > 244
+
+ # Impossible or highly unlikely byte? Clean it.
+ if is_unused || is_restricted
+ bytes[i] = tidy_byte(byte)
+ elsif is_cont
+ # Not expecting contination byte? Clean up. Otherwise, now expect one less.
+ conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
else
- c
+ if conts_expected > 0
+ # Expected continuation, but got ASCII or leading? Clean backwards up to
+ # the leading byte.
+ (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
+ conts_expected = 0
+ end
+ if is_lead
+ # Final byte is leading? Clean it.
+ if i == bytes.length - 1
+ bytes[i] = tidy_byte(bytes.last)
+ else
+ # Valid leading byte? Expect continuations determined by position of
+ # first zero bit, with max of 3.
+ conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
+ last_lead = i
+ end
+ end
end
- end.join
+ end
+ bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
end
end
protected
-
+
def translate_offset(byte_offset) #:nodoc:
return nil if byte_offset.nil?
return 0 if @wrapped_string == ''
-
+
if @wrapped_string.respond_to?(:force_encoding)
@wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
end
-
+
begin
@wrapped_string[0...byte_offset].unpack('U*').length
rescue ArgumentError => e
diff --git a/activesupport/lib/active_support/ordered_hash.rb b/activesupport/lib/active_support/ordered_hash.rb
index 57ead35827..e1a2866863 100644
--- a/activesupport/lib/active_support/ordered_hash.rb
+++ b/activesupport/lib/active_support/ordered_hash.rb
@@ -1,13 +1,28 @@
require 'yaml'
+YAML.add_builtin_type("omap") do |type, val|
+ ActiveSupport::OrderedHash[val.map(&:to_a).map(&:first)]
+end
+
# OrderedHash is namespaced to prevent conflicts with other implementations
module ActiveSupport
- # Hash is ordered in Ruby 1.9!
- if RUBY_VERSION >= '1.9'
- class OrderedHash < ::Hash #:nodoc:
+ class OrderedHash < ::Hash #:nodoc:
+ def to_yaml_type
+ "!tag:yaml.org,2002:omap"
end
- else
- class OrderedHash < Hash #:nodoc:
+
+ def to_yaml(opts = {})
+ YAML.quick_emit(self, opts) do |out|
+ out.seq(taguri, to_yaml_style) do |seq|
+ each do |k, v|
+ seq.add(k => v)
+ end
+ end
+ end
+ end
+
+ # Hash is ordered in Ruby 1.9!
+ if RUBY_VERSION < '1.9'
def initialize(*args, &block)
super
@keys = []
@@ -55,7 +70,7 @@ module ActiveSupport
end
super
end
-
+
def delete_if
super
sync_keys!
@@ -134,31 +149,10 @@ module ActiveSupport
"#<OrderedHash #{super}>"
end
- private
-
- def sync_keys!
- @keys.delete_if {|k| !has_key?(k)}
- end
- end
- end
-
- class OrderedHash #:nodoc:
- def to_yaml_type
- "!tag:yaml.org,2002:omap"
- end
-
- def to_yaml(opts = {})
- YAML.quick_emit(self, opts) do |out|
- out.seq(taguri, to_yaml_style) do |seq|
- each do |k, v|
- seq.add(k => v)
- end
+ private
+ def sync_keys!
+ @keys.delete_if {|k| !has_key?(k)}
end
- end
end
end
-
- YAML.add_builtin_type("omap") do |type, val|
- ActiveSupport::OrderedHash[val.map(&:to_a).map(&:first)]
- end
end
diff --git a/activesupport/lib/active_support/version.rb b/activesupport/lib/active_support/version.rb
index 3ce11e59d2..538a8b87c6 100644
--- a/activesupport/lib/active_support/version.rb
+++ b/activesupport/lib/active_support/version.rb
@@ -3,7 +3,7 @@ module ActiveSupport
MAJOR = 3
MINOR = 0
TINY = 0
- BUILD = "beta2"
+ BUILD = "beta3"
STRING = [MAJOR, MINOR, TINY, BUILD].join('.')
end
diff --git a/activesupport/test/clean_backtrace_test.rb b/activesupport/test/clean_backtrace_test.rb
index ddbc258df1..86838a7f9a 100644
--- a/activesupport/test/clean_backtrace_test.rb
+++ b/activesupport/test/clean_backtrace_test.rb
@@ -9,6 +9,11 @@ class BacktraceCleanerFilterTest < ActiveSupport::TestCase
test "backtrace should not contain prefix when it has been filtered out" do
assert_equal "/my/class.rb", @bc.clean([ "/my/prefix/my/class.rb" ]).first
end
+
+ test "backtrace cleaner should allow removing filters" do
+ @bc.remove_filters!
+ assert_equal "/my/prefix/my/class.rb", @bc.clean(["/my/prefix/my/class.rb"]).first
+ end
test "backtrace should contain unaltered lines if they dont match a filter" do
assert_equal "/my/other_prefix/my/class.rb", @bc.clean([ "/my/other_prefix/my/class.rb" ]).first
@@ -44,4 +49,4 @@ class BacktraceCleanerFilterAndSilencerTest < ActiveSupport::TestCase
test "backtrace should not silence lines that has first had their silence hook filtered out" do
assert_equal [ "/class.rb" ], @bc.clean([ "/mongrel/class.rb" ])
end
-end \ No newline at end of file
+end
diff --git a/activesupport/test/multibyte_chars_test.rb b/activesupport/test/multibyte_chars_test.rb
index 0e489c10e1..1b8d13c024 100644
--- a/activesupport/test/multibyte_chars_test.rb
+++ b/activesupport/test/multibyte_chars_test.rb
@@ -107,7 +107,7 @@ class MultibyteCharsUTF8BehaviourTest < Test::Unit::TestCase
# Ruby 1.9 only supports basic whitespace
@whitespace = "\n\t ".force_encoding(Encoding::UTF_8)
end
-
+
@byte_order_mark = [65279].pack('U')
end
@@ -468,14 +468,6 @@ end
class MultibyteCharsExtrasTest < Test::Unit::TestCase
include MultibyteTestHelpers
- if RUBY_VERSION >= '1.9'
- def test_tidy_bytes_is_broken_on_1_9_0
- assert_raise(ArgumentError) do
- assert_equal_codepoints [0xfffd].pack('U'), chars("\xef\xbf\xbd").tidy_bytes
- end
- end
- end
-
def test_upcase_should_be_unicode_aware
assert_equal "АБВГД\0F", chars("аБвгд\0f").upcase
assert_equal 'こにちわ', chars('こにちわ').upcase
@@ -504,7 +496,7 @@ class MultibyteCharsExtrasTest < Test::Unit::TestCase
def test_limit_should_work_on_a_multibyte_string
example = chars(UNICODE_STRING)
bytesize = UNICODE_STRING.respond_to?(:bytesize) ? UNICODE_STRING.bytesize : UNICODE_STRING.size
-
+
assert_equal UNICODE_STRING, example.limit(bytesize)
assert_equal '', example.limit(0)
assert_equal '', example.limit(1)
@@ -531,7 +523,7 @@ class MultibyteCharsExtrasTest < Test::Unit::TestCase
assert example.limit(limit).to_s.length <= limit
end
end
-
+
def test_composition_exclusion_is_set_up_properly
# Normalization of DEVANAGARI LETTER QA breaks when composition exclusion isn't used correctly
qa = [0x915, 0x93c].pack('U*')
@@ -607,28 +599,57 @@ class MultibyteCharsExtrasTest < Test::Unit::TestCase
end
def test_tidy_bytes_should_tidy_bytes
+
+ single_byte_cases = {
+ "\x21" => "!", # Valid ASCII byte, low
+ "\x41" => "A", # Valid ASCII byte, mid
+ "\x7E" => "~", # Valid ASCII byte, high
+ "\x80" => "€", # Continuation byte, low (cp125)
+ "\x94" => "”", # Continuation byte, mid (cp125)
+ "\x9F" => "Ÿ", # Continuation byte, high (cp125)
+ "\xC0" => "À", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
+ "\xC1" => "Á", # Overlong encoding, start of 2-byte sequence, but codepoint < 128
+ "\xC2" => "Â", # Start of 2-byte sequence, low
+ "\xC8" => "È", # Start of 2-byte sequence, mid
+ "\xDF" => "ß", # Start of 2-byte sequence, high
+ "\xE0" => "à", # Start of 3-byte sequence, low
+ "\xE8" => "è", # Start of 3-byte sequence, mid
+ "\xEF" => "ï", # Start of 3-byte sequence, high
+ "\xF0" => "ð", # Start of 4-byte sequence
+ "\xF1" => "ñ", # Unused byte
+ "\xFF" => "ÿ", # Restricted byte
+ "\x00" => "\x00" # null char
+ }
+
+ single_byte_cases.each do |bad, good|
+ assert_equal good, chars(bad).tidy_bytes.to_s
+ assert_equal "#{good}#{good}", chars("#{bad}#{bad}").tidy_bytes
+ assert_equal "#{good}#{good}#{good}", chars("#{bad}#{bad}#{bad}").tidy_bytes
+ assert_equal "#{good}a", chars("#{bad}a").tidy_bytes
+ assert_equal "#{good}á", chars("#{bad}á").tidy_bytes
+ assert_equal "a#{good}a", chars("a#{bad}a").tidy_bytes
+ assert_equal "á#{good}á", chars("á#{bad}á").tidy_bytes
+ assert_equal "a#{good}", chars("a#{bad}").tidy_bytes
+ assert_equal "á#{good}", chars("á#{bad}").tidy_bytes
+ end
+
byte_string = "\270\236\010\210\245"
tidy_string = [0xb8, 0x17e, 0x8, 0x2c6, 0xa5].pack('U*')
- ascii_padding = 'aa'
- utf8_padding = 'éé'
-
assert_equal_codepoints tidy_string, chars(byte_string).tidy_bytes
-
- assert_equal_codepoints ascii_padding.dup.insert(1, tidy_string),
- chars(ascii_padding.dup.insert(1, byte_string)).tidy_bytes
- assert_equal_codepoints utf8_padding.dup.insert(2, tidy_string),
- chars(utf8_padding.dup.insert(2, byte_string)).tidy_bytes
assert_nothing_raised { chars(byte_string).tidy_bytes.to_s.unpack('U*') }
- assert_equal_codepoints "\xC3\xA7", chars("\xE7").tidy_bytes # iso_8859_1: small c cedilla
- assert_equal_codepoints "\xE2\x80\x9C", chars("\x93").tidy_bytes # win_1252: left smart quote
- assert_equal_codepoints "\xE2\x82\xAC", chars("\x80").tidy_bytes # win_1252: euro
- assert_equal_codepoints "\x00", chars("\x00").tidy_bytes # null char
- assert_equal_codepoints [0xfffd].pack('U'), chars("\xef\xbf\xbd").tidy_bytes # invalid char
- rescue ArgumentError => e
- raise e if RUBY_VERSION < '1.9'
+ # UTF-8 leading byte followed by too few continuation bytes
+ assert_equal_codepoints "\xc3\xb0\xc2\xa5\xc2\xa4\x21", chars("\xf0\xa5\xa4\x21").tidy_bytes
+ end
+
+ def test_tidy_bytes_should_forcibly_tidy_bytes_if_specified
+ byte_string = "\xF0\xA5\xA4\xA4" # valid as both CP-1252 and UTF-8, but with different interpretations.
+ assert_not_equal "𥤤", chars(byte_string).tidy_bytes
+ # Forcible conversion to UTF-8
+ assert_equal "𥤤", chars(byte_string).tidy_bytes(true)
end
+
private
def string_from_classes(classes)