From 64d109e3539ad600f58536d3ecabd2f87b67fd1c Mon Sep 17 00:00:00 2001 From: wycats Date: Sun, 16 May 2010 10:25:55 +0400 Subject: Significantly improved internal encoding heuristics and support. * Default Encoding.default_internal to UTF-8 * Eliminated the use of file-wide magic comments to coerce code evaluated inside the file * Read templates as BINARY, use default_external or template-wide magic comments inside the Template to set the initial encoding * This means that template handlers in Ruby 1.9 will receive Strings encoded in default_internal (UTF-8 by default) * Create a better Exception for encoding issues, and use it when the template source has bytes that are not compatible with the specified encoding * Allow template handlers to opt-into handling BINARY. If they do so, they need to do some of their own manual encoding work * Added a "Configuration Gotchas" section to the intro Rails Guide instructing users to use UTF-8 for everything * Use config.encoding= in Ruby 1.8, and raise if a value that is an invalid $KCODE value is used Also: * Fixed a few tests that were assert() rather than assert_equal() and were caught by Minitest requiring a String for the message * Fixed a test where an assert_select was misformed, also caught by Minitest being more restrictive * Fixed a test where a Rack response was returning a String rather than an Enumerable --- .../lib/action_view/template/handlers/erb.rb | 45 ++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'actionpack/lib/action_view/template/handlers') diff --git a/actionpack/lib/action_view/template/handlers/erb.rb b/actionpack/lib/action_view/template/handlers/erb.rb index 17652d6d1f..bbf012ab15 100644 --- a/actionpack/lib/action_view/template/handlers/erb.rb +++ b/actionpack/lib/action_view/template/handlers/erb.rb @@ -5,6 +5,11 @@ require 'erubis' module ActionView class OutputBuffer < ActiveSupport::SafeBuffer + def initialize(*) + super + encode! + end + def <<(value) super(value.to_s) end @@ -72,16 +77,50 @@ module ActionView cattr_accessor :erb_implementation self.erb_implementation = Erubis - ENCODING_TAG = Regexp.new("\A(<%#{ENCODING_FLAG}-?%>)[ \t]*") + ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*") + + def self.accepts_binary? + true + end def compile(template) - erb = template.source.gsub(ENCODING_TAG, '') + if template.source.encoding_aware? + # Even though Rails has given us a String tagged with the + # default_internal encoding (likely UTF-8), it is possible + # that the String is actually encoded using a different + # encoding, specified via an ERB magic comment. If the + # String is not actually UTF-8, the regular expression + # engine will (correctly) raise an exception. For now, + # we'll reset the String to BINARY so we can run regular + # expressions against it + template_source = template.source.dup.force_encoding("BINARY") + + # Erubis does not have direct support for encodings. + # As a result, we will extract the ERB-style magic + # comment, give the String to Erubis as BINARY data, + # and then tag the resulting String with the extracted + # encoding later + erb = template_source.gsub(ENCODING_TAG, '') + encoding = $2 + + if !encoding && (template.source.encoding == Encoding::BINARY) + raise WrongEncodingError.new(template_source, Encoding.default_external) + end + end + result = self.class.erb_implementation.new( erb, :trim => (self.class.erb_trim_mode == "-") ).src - result = "#{$2}\n#{result}" if $2 + # If an encoding tag was found, tag the String + # we're returning with that encoding. Otherwise, + # return a BINARY String, which is what ERB + # returns. Note that if a magic comment was + # not specified, we will return the data to + # Rails as BINARY, which will then use its + # own encoding logic to create a UTF-8 String. + result = "\n#{result}".force_encoding(encoding).encode if encoding result end end -- cgit v1.2.3