diff options
-rw-r--r-- | actionpack/lib/action_view/template.rb | 77 | ||||
-rw-r--r-- | actionpack/lib/action_view/template/error.rb | 1 | ||||
-rw-r--r-- | actionpack/lib/action_view/template/handlers/erb.rb | 52 | ||||
-rw-r--r-- | actionpack/test/template/template_test.rb | 17 |
4 files changed, 78 insertions, 69 deletions
diff --git a/actionpack/lib/action_view/template.rb b/actionpack/lib/action_view/template.rb index 5d8ac6b115..53ad24fdc6 100644 --- a/actionpack/lib/action_view/template.rb +++ b/actionpack/lib/action_view/template.rb @@ -22,6 +22,10 @@ module ActionView # users will see diamonds with question marks in them in # the browser. # + # For the rest of this documentation, when we say "UTF-8", + # we mean "UTF-8 or whatever the default_internal encoding + # is set to". By default, it will be UTF-8. + # # To mitigate this problem, we use a few strategies: # 1. If the source is not valid UTF-8, we raise an exception # when the template is compiled to alert the user @@ -32,8 +36,7 @@ module ActionView # to the resulting compiled source returned by the # template handler. # 3. In all cases, we transcode the resulting String to - # the <tt>default_internal</tt> encoding (which defaults - # to UTF-8). + # the UTF-8. # # This means that other parts of Rails can always assume # that templates are encoded in UTF-8, even if the original @@ -60,14 +63,14 @@ module ActionView # # If you want to provide an alternate mechanism for # specifying encodings (like ERB does via <%# encoding: ... %>), - # you may indicate that you are willing to accept - # BINARY data by implementing <tt>self.accepts_binary?</tt> + # you may indicate that you will handle encodings yourself + # by implementing <tt>self.handles_encoding?</tt> # on your handler. # - # If you do, Rails will not raise an exception if - # the template's encoding could not be determined, - # assuming that you have another mechanism for - # making the determination. + # If you do, Rails will not try to encode the String + # into the default_internal, passing you the unaltered + # bytes tagged with the assumed encoding (from + # default_external). # # In this case, make sure you return a String from # your handler encoded in the default_internal. Since @@ -171,7 +174,12 @@ module ActionView # before passing the source on to the template engine, leaving a # blank line in its stead. # - # Note that after we figure out the correct encoding, we then + # If the template engine handles encodings, we send the encoded + # String to the engine without further processing. This allows + # the template engine to support additional mechanisms for + # specifying the encoding. For instance, ERB supports <%# encoding: %> + # + # Otherwise, after we figure out the correct encoding, we then # encode the source into Encoding.default_internal. In general, # this means that templates will be UTF-8 inside of Rails, # regardless of the original source encoding. @@ -182,8 +190,11 @@ module ActionView locals_code = locals.keys.map! { |key| "#{key} = local_assigns[:#{key}];" }.join if source.encoding_aware? + # Look for # encoding: *. If we find one, we'll encode the + # String in that encoding, otherwise, we'll use the + # default external encoding. if source.sub!(/\A#{ENCODING_FLAG}/, '') - encoding = $1 + encoding = magic_encoding = $1 else encoding = Encoding.default_external end @@ -192,34 +203,28 @@ module ActionView # or the encoding specified in the file source.force_encoding(encoding) - # If the original encoding is BINARY, the actual - # encoding is either stored out-of-band (such as - # in ERB <%# %> style magic comments) or missing. - # This is also true if the original encoding is - # something other than BINARY, but it's invalid. - if source.encoding != Encoding::BINARY && source.valid_encoding? + # If the user didn't specify an encoding, and the handler + # handles encodings, we simply pass the String as is to + # the handler (with the default_external tag) + if !magic_encoding && @handler.respond_to?(:handles_encoding?) && @handler.handles_encoding? + source + # Otherwise, if the String is valid in the encoding, + # encode immediately to default_internal. This means + # that if a handler doesn't handle encodings, it will + # always get Strings in the default_internal + elsif source.valid_encoding? source.encode! - # If the assumed encoding is incorrect, check to - # see whether the handler accepts BINARY. If it - # does, it has another mechanism for determining - # the true encoding of the String. - elsif @handler.respond_to?(:accepts_binary?) && @handler.accepts_binary? - source.force_encoding(Encoding::BINARY) - # If the handler does not accept BINARY, the - # assumed encoding (either the default_external, - # or the explicit encoding specified by the user) - # is incorrect. We raise an exception here. + # Otherwise, since the String is invalid in the encoding + # specified, raise an exception else raise WrongEncodingError.new(source, encoding) end - - # Don't validate the encoding yet -- the handler - # may treat the String as raw bytes and extract - # the encoding some other way end code = @handler.call(self) + # Make sure that the resulting String to be evalled is in the + # encoding of the code source = <<-end_src def #{method_name}(local_assigns) _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = @output_buffer;#{locals_code};#{code} @@ -229,20 +234,16 @@ module ActionView end_src if source.encoding_aware? - # Handlers should return their source Strings in either the - # default_internal or BINARY. If the handler returns a BINARY - # String, we assume its encoding is the one we determined - # earlier, and encode the resulting source in the default_internal. - if source.encoding == Encoding::BINARY - source.force_encoding(Encoding.default_internal) - end + # Make sure the source is in the encoding of the returned code + source.force_encoding(code.encoding) # In case we get back a String from a handler that is not in # BINARY or the default_internal, encode it to the default_internal source.encode! # Now, validate that the source we got back from the template - # handler is valid in the default_internal + # handler is valid in the default_internal. This is for handlers + # that handle encoding but screw up unless source.valid_encoding? raise WrongEncodingError.new(@source, Encoding.default_internal) end diff --git a/actionpack/lib/action_view/template/error.rb b/actionpack/lib/action_view/template/error.rb index d3a53d2147..e50de7e5af 100644 --- a/actionpack/lib/action_view/template/error.rb +++ b/actionpack/lib/action_view/template/error.rb @@ -13,6 +13,7 @@ module ActionView end def message + @string.force_encoding("BINARY") "Your template was not saved as valid #{@encoding}. Please " \ "either specify #{@encoding} as the encoding for your template " \ "in your text editor, or mark the template with its " \ diff --git a/actionpack/lib/action_view/template/handlers/erb.rb b/actionpack/lib/action_view/template/handlers/erb.rb index cbed0108cf..ce609e01af 100644 --- a/actionpack/lib/action_view/template/handlers/erb.rb +++ b/actionpack/lib/action_view/template/handlers/erb.rb @@ -79,51 +79,49 @@ module ActionView ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*") - def self.accepts_binary? + def self.handles_encoding? true end def compile(template) if template.source.encoding_aware? - # Even though Rails has given us a String tagged with the - # default_internal encoding (likely UTF-8), it is possible - # that the String is actually encoded using a different - # encoding, specified via an ERB magic comment. If the - # String is not actually UTF-8, the regular expression - # engine will (correctly) raise an exception. For now, - # we'll reset the String to BINARY so we can run regular - # expressions against it + # First, convert to BINARY, so in case the encoding is + # wrong, we can still find an encoding tag + # (<%# encoding %>) inside the String using a regular + # expression template_source = template.source.dup.force_encoding("BINARY") - # Erubis does not have direct support for encodings. - # As a result, we will extract the ERB-style magic - # comment, give the String to Erubis as BINARY data, - # and then tag the resulting String with the extracted - # encoding later erb = template_source.gsub(ENCODING_TAG, '') encoding = $2 - if !encoding && (template.source.encoding == Encoding::BINARY) - raise WrongEncodingError.new(template_source, Encoding.default_external) - end + erb.force_encoding valid_encoding(template.source.dup, encoding) + + # Always make sure we return a String in the default_internal + erb.encode! else erb = template.source.dup end - result = self.class.erb_implementation.new( + self.class.erb_implementation.new( erb, :trim => (self.class.erb_trim_mode == "-") ).src + end + + private + def valid_encoding(string, encoding) + # If a magic encoding comment was found, tag the + # String with this encoding. This is for a case + # where the original String was assumed to be, + # for instance, UTF-8, but a magic comment + # proved otherwise + string.force_encoding(encoding) if encoding + + # If the String is valid, return the encoding we found + return string.encoding if string.valid_encoding? - # If an encoding tag was found, tag the String - # we're returning with that encoding. Otherwise, - # return a BINARY String, which is what ERB - # returns. Note that if a magic comment was - # not specified, we will return the data to - # Rails as BINARY, which will then use its - # own encoding logic to create a UTF-8 String. - result = "\n#{result}".force_encoding(encoding).encode if encoding - result + # Otherwise, raise an exception + raise WrongEncodingError.new(string, string.encoding) end end end diff --git a/actionpack/test/template/template_test.rb b/actionpack/test/template/template_test.rb index 995d728d50..18e0e83ec3 100644 --- a/actionpack/test/template/template_test.rb +++ b/actionpack/test/template/template_test.rb @@ -114,10 +114,12 @@ class TestERBTemplate < ActiveSupport::TestCase end def test_encoding_can_be_specified_with_magic_comment_in_erb - @template = new_template("<%# encoding: ISO-8859-1 %>hello \xFCmlat") - result = render - assert_equal Encoding::UTF_8, render.encoding - assert_equal "hello \u{fc}mlat", render + with_external_encoding Encoding::UTF_8 do + @template = new_template("<%# encoding: ISO-8859-1 %>hello \xFCmlat") + result = render + assert_equal Encoding::UTF_8, render.encoding + assert_equal "hello \u{fc}mlat", render + end end def test_error_when_template_isnt_valid_utf8 @@ -126,5 +128,12 @@ class TestERBTemplate < ActiveSupport::TestCase render end end + + def with_external_encoding(encoding) + old, Encoding.default_external = Encoding.default_external, encoding + yield + ensure + Encoding.default_external = old + end end end
\ No newline at end of file |