diff options
| author | wycats <wycats@gmail.com> | 2010-05-16 10:25:55 +0400 | 
|---|---|---|
| committer | wycats <wycats@gmail.com> | 2010-05-16 22:44:43 +0400 | 
| commit | 64d109e3539ad600f58536d3ecabd2f87b67fd1c (patch) | |
| tree | 4b1deedbd5e33dd5410b1a37e9895f7a254c751a /actionpack/lib | |
| parent | af0d1a88157942c6e6398dbf73891cff1e152405 (diff) | |
| download | rails-64d109e3539ad600f58536d3ecabd2f87b67fd1c.tar.gz rails-64d109e3539ad600f58536d3ecabd2f87b67fd1c.tar.bz2 rails-64d109e3539ad600f58536d3ecabd2f87b67fd1c.zip | |
Significantly improved internal encoding heuristics and support.
* Default Encoding.default_internal to UTF-8
* Eliminated the use of file-wide magic comments to coerce code evaluated inside the file
* Read templates as BINARY, use default_external or template-wide magic comments
  inside the Template to set the initial encoding
  * This means that template handlers in Ruby 1.9 will receive Strings encoded
    in default_internal (UTF-8 by default)
* Create a better Exception for encoding issues, and use it when the template
  source has bytes that are not compatible with the specified encoding
* Allow template handlers to opt-into handling BINARY. If they do so, they
  need to do some of their own manual encoding work
* Added a "Configuration Gotchas" section to the intro Rails Guide instructing
  users to use UTF-8 for everything
* Use config.encoding= in Ruby 1.8, and raise if a value that is an invalid
  $KCODE value is used
Also:
* Fixed a few tests that were assert() rather than assert_equal() and
  were caught by Minitest requiring a String for the message
* Fixed a test where an assert_select was misformed, also caught by
  Minitest being more restrictive
* Fixed a test where a Rack response was returning a String rather
  than an Enumerable
Diffstat (limited to 'actionpack/lib')
| -rw-r--r-- | actionpack/lib/action_view.rb | 6 | ||||
| -rw-r--r-- | actionpack/lib/action_view/template.rb | 201 | ||||
| -rw-r--r-- | actionpack/lib/action_view/template/error.rb | 18 | ||||
| -rw-r--r-- | actionpack/lib/action_view/template/handlers/erb.rb | 45 | ||||
| -rw-r--r-- | actionpack/lib/action_view/template/resolver.rb | 5 | 
5 files changed, 238 insertions, 37 deletions
| diff --git a/actionpack/lib/action_view.rb b/actionpack/lib/action_view.rb index 5e3b2ec51b..9f56cca869 100644 --- a/actionpack/lib/action_view.rb +++ b/actionpack/lib/action_view.rb @@ -51,7 +51,9 @@ module ActionView      autoload :MissingTemplate,    'action_view/template/error'      autoload :ActionViewError,    'action_view/template/error' -    autoload :TemplateError,     'action_view/template/error' +    autoload :EncodingError,      'action_view/template/error' +    autoload :TemplateError,      'action_view/template/error' +    autoload :WrongEncodingError, 'action_view/template/error'      autoload :TemplateHandler,   'action_view/template'      autoload :TemplateHandlers,  'action_view/template' @@ -59,7 +61,7 @@ module ActionView    autoload :TestCase, 'action_view/test_case' -  ENCODING_FLAG = "#.*coding[:=]\s*(\S+)[ \t]*" +  ENCODING_FLAG = '#.*coding[:=]\s*(\S+)[ \t]*'  end  require 'active_support/i18n' diff --git a/actionpack/lib/action_view/template.rb b/actionpack/lib/action_view/template.rb index ce249e2a96..5d8ac6b115 100644 --- a/actionpack/lib/action_view/template.rb +++ b/actionpack/lib/action_view/template.rb @@ -1,12 +1,89 @@ -# encoding: utf-8 -# This is so that templates compiled in this file are UTF-8  require 'active_support/core_ext/array/wrap'  require 'active_support/core_ext/object/blank' +require 'active_support/core_ext/kernel/singleton_class'  module ActionView    class Template      extend ActiveSupport::Autoload +    # === Encodings in ActionView::Template +    # +    # ActionView::Template is one of a few sources of potential +    # encoding issues in Rails. This is because the source for +    # templates are usually read from disk, and Ruby (like most +    # encoding-aware programming languages) assumes that the +    # String retrieved through File IO is encoded in the +    # <tt>default_external</tt> encoding. In Rails, the default +    # <tt>default_external</tt> encoding is UTF-8. +    # +    # As a result, if a user saves their template as ISO-8859-1 +    # (for instance, using a non-Unicode-aware text editor), +    # and uses characters outside of the ASCII range, their +    # users will see diamonds with question marks in them in +    # the browser. +    # +    # To mitigate this problem, we use a few strategies: +    # 1. If the source is not valid UTF-8, we raise an exception +    #    when the template is compiled to alert the user +    #    to the problem. +    # 2. The user can specify the encoding using Ruby-style +    #    encoding comments in any template engine. If such +    #    a comment is supplied, Rails will apply that encoding +    #    to the resulting compiled source returned by the +    #    template handler. +    # 3. In all cases, we transcode the resulting String to +    #    the <tt>default_internal</tt> encoding (which defaults +    #    to UTF-8). +    # +    # This means that other parts of Rails can always assume +    # that templates are encoded in UTF-8, even if the original +    # source of the template was not UTF-8. +    # +    # From a user's perspective, the easiest thing to do is +    # to save your templates as UTF-8. If you do this, you +    # do not need to do anything else for things to "just work". +    # +    # === Instructions for template handlers +    # +    # The easiest thing for you to do is to simply ignore +    # encodings. Rails will hand you the template source +    # as the default_internal (generally UTF-8), raising +    # an exception for the user before sending the template +    # to you if it could not determine the original encoding. +    # +    # For the greatest simplicity, you can support only +    # UTF-8 as the <tt>default_internal</tt>. This means +    # that from the perspective of your handler, the +    # entire pipeline is just UTF-8. +    # +    # === Advanced: Handlers with alternate metadata sources +    # +    # If you want to provide an alternate mechanism for +    # specifying encodings (like ERB does via <%# encoding: ... %>), +    # you may indicate that you are willing to accept +    # BINARY data by implementing <tt>self.accepts_binary?</tt> +    # on your handler. +    # +    # If you do, Rails will not raise an exception if +    # the template's encoding could not be determined, +    # assuming that you have another mechanism for +    # making the determination. +    # +    # In this case, make sure you return a String from +    # your handler encoded in the default_internal. Since +    # you are handling out-of-band metadata, you are +    # also responsible for alerting the user to any +    # problems with converting the user's data to +    # the default_internal. +    # +    # To do so, simply raise the raise WrongEncodingError +    # as follows: +    # +    #     raise WrongEncodingError.new( +    #       problematic_string, +    #       expected_encoding +    #     ) +      eager_autoload do        autoload :Error        autoload :Handler @@ -16,26 +93,22 @@ module ActionView      extend Template::Handlers -    attr_reader :source, :identifier, :handler, :virtual_path, :formats +    attr_reader :source, :identifier, :handler, :virtual_path, :formats, +                :original_encoding -    Finalizer = proc do |method_name| +    Finalizer = proc do |method_name, mod|        proc do -        ActionView::CompiledTemplates.module_eval do +        mod.module_eval do            remove_possible_method method_name          end        end      end      def initialize(source, identifier, handler, details) -      if source.encoding_aware? && source =~ %r{\A#{ENCODING_FLAG}} -        # don't snip off the \n to preserve line numbers -        source.sub!(/\A[^\n]*/, '') -        source.force_encoding($1).encode -      end - -      @source     = source -      @identifier = identifier -      @handler    = handler +      @source             = source +      @identifier         = identifier +      @handler            = handler +      @original_encoding  = nil        @virtual_path = details[:virtual_path]        @method_names = {} @@ -48,7 +121,13 @@ module ActionView        # Notice that we use a bang in this instrumentation because you don't want to        # consume this in production. This is only slow if it's being listened to.        ActiveSupport::Notifications.instrument("!render_template.action_view", :virtual_path => @virtual_path) do -        method_name = compile(locals, view) +        if view.is_a?(ActionView::CompiledTemplates) +          mod = ActionView::CompiledTemplates +        else +          mod = view.singleton_class +        end + +        method_name = compile(locals, view, mod)          view.send(method_name, locals, &block)        end      rescue Exception => e @@ -56,7 +135,7 @@ module ActionView          e.sub_template_of(self)          raise e        else -        raise Template::Error.new(self, view.assigns, e) +        raise Template::Error.new(self, view.respond_to?(:assigns) ? view.assigns : {}, e)        end      end @@ -81,37 +160,97 @@ module ActionView      end      private -      def compile(locals, view) +      # Among other things, this method is responsible for properly setting +      # the encoding of the source. Until this point, we assume that the +      # source is BINARY data. If no additional information is supplied, +      # we assume the encoding is the same as Encoding.default_external. +      # +      # The user can also specify the encoding via a comment on the first +      # line of the template (# encoding: NAME-OF-ENCODING). This will work +      # with any template engine, as we process out the encoding comment +      # before passing the source on to the template engine, leaving a +      # blank line in its stead. +      # +      # Note that after we figure out the correct encoding, we then +      # encode the source into Encoding.default_internal. In general, +      # this means that templates will be UTF-8 inside of Rails, +      # regardless of the original source encoding. +      def compile(locals, view, mod)          method_name = build_method_name(locals)          return method_name if view.respond_to?(method_name)          locals_code = locals.keys.map! { |key| "#{key} = local_assigns[:#{key}];" }.join -        code = @handler.call(self) -        if code.sub!(/\A(#.*coding.*)\n/, '') -          encoding_comment = $1 -        elsif defined?(Encoding) && Encoding.respond_to?(:default_external) -          encoding_comment = "#coding:#{Encoding.default_external}" +        if source.encoding_aware? +          if source.sub!(/\A#{ENCODING_FLAG}/, '') +            encoding = $1 +          else +            encoding = Encoding.default_external +          end + +          # Tag the source with the default external encoding +          # or the encoding specified in the file +          source.force_encoding(encoding) + +          # If the original encoding is BINARY, the actual +          # encoding is either stored out-of-band (such as +          # in ERB <%# %> style magic comments) or missing. +          # This is also true if the original encoding is +          # something other than BINARY, but it's invalid. +          if source.encoding != Encoding::BINARY && source.valid_encoding? +            source.encode! +          # If the assumed encoding is incorrect, check to +          # see whether the handler accepts BINARY. If it +          # does, it has another mechanism for determining +          # the true encoding of the String. +          elsif @handler.respond_to?(:accepts_binary?) && @handler.accepts_binary? +            source.force_encoding(Encoding::BINARY) +          # If the handler does not accept BINARY, the +          # assumed encoding (either the default_external, +          # or the explicit encoding specified by the user) +          # is incorrect. We raise an exception here. +          else +            raise WrongEncodingError.new(source, encoding) +          end + +          # Don't validate the encoding yet -- the handler +          # may treat the String as raw bytes and extract +          # the encoding some other way          end +        code = @handler.call(self) +          source = <<-end_src            def #{method_name}(local_assigns) -            _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = output_buffer;#{locals_code};#{code} +            _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = @output_buffer;#{locals_code};#{code}            ensure -            @_virtual_path, self.output_buffer = _old_virtual_path, _old_output_buffer +            @_virtual_path, @output_buffer = _old_virtual_path, _old_output_buffer            end          end_src -        if encoding_comment -          source = "#{encoding_comment}\n#{source}" -          line = -1 -        else -          line = 0 +        if source.encoding_aware? +          # Handlers should return their source Strings in either the +          # default_internal or BINARY. If the handler returns a BINARY +          # String, we assume its encoding is the one we determined +          # earlier, and encode the resulting source in the default_internal. +          if source.encoding == Encoding::BINARY +            source.force_encoding(Encoding.default_internal) +          end + +          # In case we get back a String from a handler that is not in +          # BINARY or the default_internal, encode it to the default_internal +          source.encode! + +          # Now, validate that the source we got back from the template +          # handler is valid in the default_internal +          unless source.valid_encoding? +            raise WrongEncodingError.new(@source, Encoding.default_internal) +          end          end          begin -          ActionView::CompiledTemplates.module_eval(source, identifier, line) -          ObjectSpace.define_finalizer(self, Finalizer[method_name]) +          mod.module_eval(source, identifier, 0) +          ObjectSpace.define_finalizer(self, Finalizer[method_name, mod])            method_name          rescue Exception => e # errors from template code diff --git a/actionpack/lib/action_view/template/error.rb b/actionpack/lib/action_view/template/error.rb index 6866eabf77..d3a53d2147 100644 --- a/actionpack/lib/action_view/template/error.rb +++ b/actionpack/lib/action_view/template/error.rb @@ -4,6 +4,24 @@ module ActionView    class ActionViewError < StandardError #:nodoc:    end +  class EncodingError < StandardError #:nodoc: +  end + +  class WrongEncodingError < EncodingError #:nodoc: +    def initialize(string, encoding) +      @string, @encoding = string, encoding +    end + +    def message +      "Your template was not saved as valid #{@encoding}. Please " \ +      "either specify #{@encoding} as the encoding for your template " \ +      "in your text editor, or mark the template with its " \ +      "encoding by inserting the following as the first line " \ +      "of the template:\n\n# encoding: <name of correct encoding>.\n\n" \ +      "The source of your template was:\n\n#{@string}" +    end +  end +    class MissingTemplate < ActionViewError #:nodoc:      attr_reader :path diff --git a/actionpack/lib/action_view/template/handlers/erb.rb b/actionpack/lib/action_view/template/handlers/erb.rb index 17652d6d1f..bbf012ab15 100644 --- a/actionpack/lib/action_view/template/handlers/erb.rb +++ b/actionpack/lib/action_view/template/handlers/erb.rb @@ -5,6 +5,11 @@ require 'erubis'  module ActionView    class OutputBuffer < ActiveSupport::SafeBuffer +    def initialize(*) +      super +      encode! +    end +      def <<(value)        super(value.to_s)      end @@ -72,16 +77,50 @@ module ActionView          cattr_accessor :erb_implementation          self.erb_implementation = Erubis -        ENCODING_TAG = Regexp.new("\A(<%#{ENCODING_FLAG}-?%>)[ \t]*") +        ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*") + +        def self.accepts_binary? +          true +        end          def compile(template) -          erb = template.source.gsub(ENCODING_TAG, '') +          if template.source.encoding_aware? +            # Even though Rails has given us a String tagged with the +            # default_internal encoding (likely UTF-8), it is possible +            # that the String is actually encoded using a different +            # encoding, specified via an ERB magic comment. If the +            # String is not actually UTF-8, the regular expression +            # engine will (correctly) raise an exception. For now, +            # we'll reset the String to BINARY so we can run regular +            # expressions against it +            template_source = template.source.dup.force_encoding("BINARY") + +            # Erubis does not have direct support for encodings. +            # As a result, we will extract the ERB-style magic +            # comment, give the String to Erubis as BINARY data, +            # and then tag the resulting String with the extracted +            # encoding later +            erb = template_source.gsub(ENCODING_TAG, '') +            encoding = $2 + +            if !encoding && (template.source.encoding == Encoding::BINARY) +              raise WrongEncodingError.new(template_source, Encoding.default_external) +            end +          end +            result = self.class.erb_implementation.new(              erb,              :trim => (self.class.erb_trim_mode == "-")            ).src -          result = "#{$2}\n#{result}" if $2 +          # If an encoding tag was found, tag the String +          # we're returning with that encoding. Otherwise, +          # return a BINARY String, which is what ERB +          # returns. Note that if a magic comment was +          # not specified, we will return the data to +          # Rails as BINARY, which will then use its +          # own encoding logic to create a UTF-8 String. +          result = "\n#{result}".force_encoding(encoding).encode if encoding            result          end        end diff --git a/actionpack/lib/action_view/template/resolver.rb b/actionpack/lib/action_view/template/resolver.rb index a223b3a55f..ef44925951 100644 --- a/actionpack/lib/action_view/template/resolver.rb +++ b/actionpack/lib/action_view/template/resolver.rb @@ -70,7 +70,10 @@ module ActionView        Dir[query].reject { |p| File.directory?(p) }.map do |p|          handler, format = extract_handler_and_format(p, formats) -        Template.new(File.read(p), File.expand_path(p), handler, + +        contents = File.open(p, "rb") {|io| io.read } + +        Template.new(contents, File.expand_path(p), handler,            :virtual_path => path, :format => format)        end      end | 
