diff options
author | wycats <wycats@gmail.com> | 2010-05-17 19:41:54 +0400 |
---|---|---|
committer | wycats <wycats@gmail.com> | 2010-05-17 19:41:54 +0400 |
commit | 80b60671f7216c571ea8711d1de8ca824aefbe54 (patch) | |
tree | c84be59be24bbd978b257ac526eb9b0f9051389f /actionpack/lib | |
parent | 5371242384171dc0255716e31e9257ddeec17d10 (diff) | |
download | rails-80b60671f7216c571ea8711d1de8ca824aefbe54.tar.gz rails-80b60671f7216c571ea8711d1de8ca824aefbe54.tar.bz2 rails-80b60671f7216c571ea8711d1de8ca824aefbe54.zip |
Revert "Moved encoding work in progress to a feature branch."
This reverts commit ade756fe42423033bae8e5aea8f58782f7a6c517.
Diffstat (limited to 'actionpack/lib')
-rw-r--r-- | actionpack/lib/action_view.rb | 6 | ||||
-rw-r--r-- | actionpack/lib/action_view/template.rb | 195 | ||||
-rw-r--r-- | actionpack/lib/action_view/template/error.rb | 18 | ||||
-rw-r--r-- | actionpack/lib/action_view/template/handlers/erb.rb | 137 | ||||
-rw-r--r-- | actionpack/lib/action_view/template/resolver.rb | 5 |
5 files changed, 289 insertions, 72 deletions
diff --git a/actionpack/lib/action_view.rb b/actionpack/lib/action_view.rb index 5555217ee2..9f56cca869 100644 --- a/actionpack/lib/action_view.rb +++ b/actionpack/lib/action_view.rb @@ -51,13 +51,17 @@ module ActionView autoload :MissingTemplate, 'action_view/template/error' autoload :ActionViewError, 'action_view/template/error' - autoload :TemplateError, 'action_view/template/error' + autoload :EncodingError, 'action_view/template/error' + autoload :TemplateError, 'action_view/template/error' + autoload :WrongEncodingError, 'action_view/template/error' autoload :TemplateHandler, 'action_view/template' autoload :TemplateHandlers, 'action_view/template' end autoload :TestCase, 'action_view/test_case' + + ENCODING_FLAG = '#.*coding[:=]\s*(\S+)[ \t]*' end require 'active_support/i18n' diff --git a/actionpack/lib/action_view/template.rb b/actionpack/lib/action_view/template.rb index a1a970e2d2..5d8ac6b115 100644 --- a/actionpack/lib/action_view/template.rb +++ b/actionpack/lib/action_view/template.rb @@ -1,12 +1,89 @@ -# encoding: utf-8 -# This is so that templates compiled in this file are UTF-8 require 'active_support/core_ext/array/wrap' require 'active_support/core_ext/object/blank' +require 'active_support/core_ext/kernel/singleton_class' module ActionView class Template extend ActiveSupport::Autoload + # === Encodings in ActionView::Template + # + # ActionView::Template is one of a few sources of potential + # encoding issues in Rails. This is because the source for + # templates are usually read from disk, and Ruby (like most + # encoding-aware programming languages) assumes that the + # String retrieved through File IO is encoded in the + # <tt>default_external</tt> encoding. In Rails, the default + # <tt>default_external</tt> encoding is UTF-8. + # + # As a result, if a user saves their template as ISO-8859-1 + # (for instance, using a non-Unicode-aware text editor), + # and uses characters outside of the ASCII range, their + # users will see diamonds with question marks in them in + # the browser. + # + # To mitigate this problem, we use a few strategies: + # 1. If the source is not valid UTF-8, we raise an exception + # when the template is compiled to alert the user + # to the problem. + # 2. The user can specify the encoding using Ruby-style + # encoding comments in any template engine. If such + # a comment is supplied, Rails will apply that encoding + # to the resulting compiled source returned by the + # template handler. + # 3. In all cases, we transcode the resulting String to + # the <tt>default_internal</tt> encoding (which defaults + # to UTF-8). + # + # This means that other parts of Rails can always assume + # that templates are encoded in UTF-8, even if the original + # source of the template was not UTF-8. + # + # From a user's perspective, the easiest thing to do is + # to save your templates as UTF-8. If you do this, you + # do not need to do anything else for things to "just work". + # + # === Instructions for template handlers + # + # The easiest thing for you to do is to simply ignore + # encodings. Rails will hand you the template source + # as the default_internal (generally UTF-8), raising + # an exception for the user before sending the template + # to you if it could not determine the original encoding. + # + # For the greatest simplicity, you can support only + # UTF-8 as the <tt>default_internal</tt>. This means + # that from the perspective of your handler, the + # entire pipeline is just UTF-8. + # + # === Advanced: Handlers with alternate metadata sources + # + # If you want to provide an alternate mechanism for + # specifying encodings (like ERB does via <%# encoding: ... %>), + # you may indicate that you are willing to accept + # BINARY data by implementing <tt>self.accepts_binary?</tt> + # on your handler. + # + # If you do, Rails will not raise an exception if + # the template's encoding could not be determined, + # assuming that you have another mechanism for + # making the determination. + # + # In this case, make sure you return a String from + # your handler encoded in the default_internal. Since + # you are handling out-of-band metadata, you are + # also responsible for alerting the user to any + # problems with converting the user's data to + # the default_internal. + # + # To do so, simply raise the raise WrongEncodingError + # as follows: + # + # raise WrongEncodingError.new( + # problematic_string, + # expected_encoding + # ) + eager_autoload do autoload :Error autoload :Handler @@ -16,20 +93,22 @@ module ActionView extend Template::Handlers - attr_reader :source, :identifier, :handler, :virtual_path, :formats + attr_reader :source, :identifier, :handler, :virtual_path, :formats, + :original_encoding - Finalizer = proc do |method_name| + Finalizer = proc do |method_name, mod| proc do - ActionView::CompiledTemplates.module_eval do + mod.module_eval do remove_possible_method method_name end end end def initialize(source, identifier, handler, details) - @source = source - @identifier = identifier - @handler = handler + @source = source + @identifier = identifier + @handler = handler + @original_encoding = nil @virtual_path = details[:virtual_path] @method_names = {} @@ -42,7 +121,13 @@ module ActionView # Notice that we use a bang in this instrumentation because you don't want to # consume this in production. This is only slow if it's being listened to. ActiveSupport::Notifications.instrument("!render_template.action_view", :virtual_path => @virtual_path) do - method_name = compile(locals, view) + if view.is_a?(ActionView::CompiledTemplates) + mod = ActionView::CompiledTemplates + else + mod = view.singleton_class + end + + method_name = compile(locals, view, mod) view.send(method_name, locals, &block) end rescue Exception => e @@ -50,7 +135,7 @@ module ActionView e.sub_template_of(self) raise e else - raise Template::Error.new(self, view.assigns, e) + raise Template::Error.new(self, view.respond_to?(:assigns) ? view.assigns : {}, e) end end @@ -75,37 +160,97 @@ module ActionView end private - def compile(locals, view) + # Among other things, this method is responsible for properly setting + # the encoding of the source. Until this point, we assume that the + # source is BINARY data. If no additional information is supplied, + # we assume the encoding is the same as Encoding.default_external. + # + # The user can also specify the encoding via a comment on the first + # line of the template (# encoding: NAME-OF-ENCODING). This will work + # with any template engine, as we process out the encoding comment + # before passing the source on to the template engine, leaving a + # blank line in its stead. + # + # Note that after we figure out the correct encoding, we then + # encode the source into Encoding.default_internal. In general, + # this means that templates will be UTF-8 inside of Rails, + # regardless of the original source encoding. + def compile(locals, view, mod) method_name = build_method_name(locals) return method_name if view.respond_to?(method_name) locals_code = locals.keys.map! { |key| "#{key} = local_assigns[:#{key}];" }.join - code = @handler.call(self) - if code.sub!(/\A(#.*coding.*)\n/, '') - encoding_comment = $1 - elsif defined?(Encoding) && Encoding.respond_to?(:default_external) - encoding_comment = "#coding:#{Encoding.default_external}" + if source.encoding_aware? + if source.sub!(/\A#{ENCODING_FLAG}/, '') + encoding = $1 + else + encoding = Encoding.default_external + end + + # Tag the source with the default external encoding + # or the encoding specified in the file + source.force_encoding(encoding) + + # If the original encoding is BINARY, the actual + # encoding is either stored out-of-band (such as + # in ERB <%# %> style magic comments) or missing. + # This is also true if the original encoding is + # something other than BINARY, but it's invalid. + if source.encoding != Encoding::BINARY && source.valid_encoding? + source.encode! + # If the assumed encoding is incorrect, check to + # see whether the handler accepts BINARY. If it + # does, it has another mechanism for determining + # the true encoding of the String. + elsif @handler.respond_to?(:accepts_binary?) && @handler.accepts_binary? + source.force_encoding(Encoding::BINARY) + # If the handler does not accept BINARY, the + # assumed encoding (either the default_external, + # or the explicit encoding specified by the user) + # is incorrect. We raise an exception here. + else + raise WrongEncodingError.new(source, encoding) + end + + # Don't validate the encoding yet -- the handler + # may treat the String as raw bytes and extract + # the encoding some other way end + code = @handler.call(self) + source = <<-end_src def #{method_name}(local_assigns) - _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = output_buffer;#{locals_code};#{code} + _old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = @output_buffer;#{locals_code};#{code} ensure - @_virtual_path, self.output_buffer = _old_virtual_path, _old_output_buffer + @_virtual_path, @output_buffer = _old_virtual_path, _old_output_buffer end end_src - if encoding_comment - source = "#{encoding_comment}\n#{source}" - line = -1 - else - line = 0 + if source.encoding_aware? + # Handlers should return their source Strings in either the + # default_internal or BINARY. If the handler returns a BINARY + # String, we assume its encoding is the one we determined + # earlier, and encode the resulting source in the default_internal. + if source.encoding == Encoding::BINARY + source.force_encoding(Encoding.default_internal) + end + + # In case we get back a String from a handler that is not in + # BINARY or the default_internal, encode it to the default_internal + source.encode! + + # Now, validate that the source we got back from the template + # handler is valid in the default_internal + unless source.valid_encoding? + raise WrongEncodingError.new(@source, Encoding.default_internal) + end end begin - ActionView::CompiledTemplates.module_eval(source, identifier, line) - ObjectSpace.define_finalizer(self, Finalizer[method_name]) + mod.module_eval(source, identifier, 0) + ObjectSpace.define_finalizer(self, Finalizer[method_name, mod]) method_name rescue Exception => e # errors from template code diff --git a/actionpack/lib/action_view/template/error.rb b/actionpack/lib/action_view/template/error.rb index 6866eabf77..d3a53d2147 100644 --- a/actionpack/lib/action_view/template/error.rb +++ b/actionpack/lib/action_view/template/error.rb @@ -4,6 +4,24 @@ module ActionView class ActionViewError < StandardError #:nodoc: end + class EncodingError < StandardError #:nodoc: + end + + class WrongEncodingError < EncodingError #:nodoc: + def initialize(string, encoding) + @string, @encoding = string, encoding + end + + def message + "Your template was not saved as valid #{@encoding}. Please " \ + "either specify #{@encoding} as the encoding for your template " \ + "in your text editor, or mark the template with its " \ + "encoding by inserting the following as the first line " \ + "of the template:\n\n# encoding: <name of correct encoding>.\n\n" \ + "The source of your template was:\n\n#{@string}" + end + end + class MissingTemplate < ActionViewError #:nodoc: attr_reader :path diff --git a/actionpack/lib/action_view/template/handlers/erb.rb b/actionpack/lib/action_view/template/handlers/erb.rb index 237746437a..bbf012ab15 100644 --- a/actionpack/lib/action_view/template/handlers/erb.rb +++ b/actionpack/lib/action_view/template/handlers/erb.rb @@ -1,9 +1,15 @@ require 'active_support/core_ext/class/attribute_accessors' require 'active_support/core_ext/string/output_safety' +require "action_view/template" require 'erubis' module ActionView class OutputBuffer < ActiveSupport::SafeBuffer + def initialize(*) + super + encode! + end + def <<(value) super(value.to_s) end @@ -17,65 +23,106 @@ module ActionView end end - module Template::Handlers - class Erubis < ::Erubis::Eruby - def add_preamble(src) - src << "@output_buffer = ActionView::OutputBuffer.new;" - end + class Template + module Handlers + class Erubis < ::Erubis::Eruby + def add_preamble(src) + src << "@output_buffer = ActionView::OutputBuffer.new;" + end - def add_text(src, text) - return if text.empty? - src << "@output_buffer.safe_concat('" << escape_text(text) << "');" - end + def add_text(src, text) + return if text.empty? + src << "@output_buffer.safe_concat('" << escape_text(text) << "');" + end - BLOCK_EXPR = /\s+(do|\{)(\s*\|[^|]*\|)?\s*\Z/ + BLOCK_EXPR = /\s+(do|\{)(\s*\|[^|]*\|)?\s*\Z/ - def add_expr_literal(src, code) - if code =~ BLOCK_EXPR - src << '@output_buffer.append= ' << code - else - src << '@output_buffer.append= (' << code << ');' + def add_expr_literal(src, code) + if code =~ BLOCK_EXPR + src << '@output_buffer.append= ' << code + else + src << '@output_buffer.append= (' << code << ');' + end end - end - def add_stmt(src, code) - if code =~ BLOCK_EXPR - src << '@output_buffer.append_if_string= ' << code - else - super + def add_stmt(src, code) + if code =~ BLOCK_EXPR + src << '@output_buffer.append_if_string= ' << code + else + super + end end - end - def add_expr_escaped(src, code) - src << '@output_buffer.append= ' << escaped_expr(code) << ';' - end + def add_expr_escaped(src, code) + src << '@output_buffer.append= ' << escaped_expr(code) << ';' + end - def add_postamble(src) - src << '@output_buffer.to_s' + def add_postamble(src) + src << '@output_buffer.to_s' + end end - end - class ERB < Template::Handler - include Compilable + class ERB < Handler + include Compilable + + ## + # :singleton-method: + # Specify trim mode for the ERB compiler. Defaults to '-'. + # See ERb documentation for suitable values. + cattr_accessor :erb_trim_mode + self.erb_trim_mode = '-' + + self.default_format = Mime::HTML + + cattr_accessor :erb_implementation + self.erb_implementation = Erubis - ## - # :singleton-method: - # Specify trim mode for the ERB compiler. Defaults to '-'. - # See ERb documentation for suitable values. - cattr_accessor :erb_trim_mode - self.erb_trim_mode = '-' + ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*") - self.default_format = Mime::HTML + def self.accepts_binary? + true + end + + def compile(template) + if template.source.encoding_aware? + # Even though Rails has given us a String tagged with the + # default_internal encoding (likely UTF-8), it is possible + # that the String is actually encoded using a different + # encoding, specified via an ERB magic comment. If the + # String is not actually UTF-8, the regular expression + # engine will (correctly) raise an exception. For now, + # we'll reset the String to BINARY so we can run regular + # expressions against it + template_source = template.source.dup.force_encoding("BINARY") + + # Erubis does not have direct support for encodings. + # As a result, we will extract the ERB-style magic + # comment, give the String to Erubis as BINARY data, + # and then tag the resulting String with the extracted + # encoding later + erb = template_source.gsub(ENCODING_TAG, '') + encoding = $2 - cattr_accessor :erb_implementation - self.erb_implementation = Erubis + if !encoding && (template.source.encoding == Encoding::BINARY) + raise WrongEncodingError.new(template_source, Encoding.default_external) + end + end - def compile(template) - source = template.source.gsub(/\A(<%(#.*coding[:=]\s*(\S+)\s*)-?%>)\s*\n?/, '') - erb = "<% __in_erb_template=true %>#{source}" - result = self.class.erb_implementation.new(erb, :trim=>(self.class.erb_trim_mode == "-")).src - result = "#{$2}\n#{result}" if $2 - result + result = self.class.erb_implementation.new( + erb, + :trim => (self.class.erb_trim_mode == "-") + ).src + + # If an encoding tag was found, tag the String + # we're returning with that encoding. Otherwise, + # return a BINARY String, which is what ERB + # returns. Note that if a magic comment was + # not specified, we will return the data to + # Rails as BINARY, which will then use its + # own encoding logic to create a UTF-8 String. + result = "\n#{result}".force_encoding(encoding).encode if encoding + result + end end end end diff --git a/actionpack/lib/action_view/template/resolver.rb b/actionpack/lib/action_view/template/resolver.rb index a223b3a55f..ef44925951 100644 --- a/actionpack/lib/action_view/template/resolver.rb +++ b/actionpack/lib/action_view/template/resolver.rb @@ -70,7 +70,10 @@ module ActionView Dir[query].reject { |p| File.directory?(p) }.map do |p| handler, format = extract_handler_and_format(p, formats) - Template.new(File.read(p), File.expand_path(p), handler, + + contents = File.open(p, "rb") {|io| io.read } + + Template.new(contents, File.expand_path(p), handler, :virtual_path => path, :format => format) end end |