From d2ed32d5929f9d837280e2354e9a7e5c99fc445f Mon Sep 17 00:00:00 2001 From: Jeremy Kemper Date: Fri, 18 May 2007 06:24:50 +0000 Subject: Parse url-encoded and multipart requests ourselves instead of delegating to CGI. git-svn-id: http://svn-commit.rubyonrails.org/rails/trunk@6764 5ecf4fe2-1ee6-0310-87b1-e25e094e27de --- actionpack/lib/action_controller/base.rb | 4 +- actionpack/lib/action_controller/cgi_ext.rb | 2 - .../lib/action_controller/cgi_ext/parameters.rb | 208 ------------ .../action_controller/cgi_ext/query_extension.rb | 88 +---- actionpack/lib/action_controller/cgi_process.rb | 16 +- actionpack/lib/action_controller/integration.rb | 2 +- actionpack/lib/action_controller/request.rb | 370 ++++++++++++++++++++- 7 files changed, 372 insertions(+), 318 deletions(-) delete mode 100644 actionpack/lib/action_controller/cgi_ext/parameters.rb (limited to 'actionpack/lib') diff --git a/actionpack/lib/action_controller/base.rb b/actionpack/lib/action_controller/base.rb index 4aa7c05f98..6ba5e141ea 100755 --- a/actionpack/lib/action_controller/base.rb +++ b/actionpack/lib/action_controller/base.rb @@ -271,7 +271,9 @@ module ActionController #:nodoc: # A YAML parser is also available and can be turned on with: # # ActionController::Base.param_parsers[Mime::YAML] = :yaml - @@param_parsers = { Mime::XML => :xml_simple } + @@param_parsers = { Mime::MULTIPART_FORM => :multipart_form, + Mime::URL_ENCODED_FORM => :url_encoded_form, + Mime::XML => :xml_simple } cattr_accessor :param_parsers # Controls the default charset for all renders. diff --git a/actionpack/lib/action_controller/cgi_ext.rb b/actionpack/lib/action_controller/cgi_ext.rb index 1934ee704a..f3b8c08d8f 100644 --- a/actionpack/lib/action_controller/cgi_ext.rb +++ b/actionpack/lib/action_controller/cgi_ext.rb @@ -1,12 +1,10 @@ require 'action_controller/cgi_ext/stdinput' -require 'action_controller/cgi_ext/parameters' require 'action_controller/cgi_ext/query_extension' require 'action_controller/cgi_ext/cookie' require 'action_controller/cgi_ext/session' class CGI #:nodoc: include ActionController::CgiExt::Stdinput - include ActionController::CgiExt::Parameters class << self alias :escapeHTML_fail_on_nil :escapeHTML diff --git a/actionpack/lib/action_controller/cgi_ext/parameters.rb b/actionpack/lib/action_controller/cgi_ext/parameters.rb deleted file mode 100644 index 5eff896d18..0000000000 --- a/actionpack/lib/action_controller/cgi_ext/parameters.rb +++ /dev/null @@ -1,208 +0,0 @@ -require 'cgi' -require 'strscan' - -module ActionController - module CgiExt - module Parameters - def self.included(base) - base.extend ClassMethods - end - - # Merge POST and GET parameters from the request body and query string, - # with GET parameters taking precedence. - def parameters - request_parameters.update(query_parameters) - end - - def query_parameters - self.class.parse_query_parameters(query_string) - end - - def request_parameters - self.class.parse_request_parameters(params, env_table) - end - - module ClassMethods - def parse_query_parameters(query_string) - return {} if query_string.blank? - - pairs = query_string.split('&').collect do |chunk| - next if chunk.empty? - key, value = chunk.split('=', 2) - next if key.empty? - value = value.nil? ? nil : CGI.unescape(value) - [ CGI.unescape(key), value ] - end.compact - - UrlEncodedPairParser.new(pairs).result - end - - def parse_request_parameters(params) - parser = UrlEncodedPairParser.new - - params = params.dup - until params.empty? - for key, value in params - if key.blank? - params.delete key - elsif !key.include?('[') - # much faster to test for the most common case first (GET) - # and avoid the call to build_deep_hash - parser.result[key] = get_typed_value(value[0]) - params.delete key - elsif value.is_a?(Array) - parser.parse(key, get_typed_value(value.shift)) - params.delete key if value.empty? - else - raise TypeError, "Expected array, found #{value.inspect}" - end - end - end - - parser.result - end - - private - def get_typed_value(value) - case value - when String - value - when NilClass - '' - when Array - value.map { |v| get_typed_value(v) } - else - # Uploaded file provides content type and filename. - if value.respond_to?(:content_type) && - !value.content_type.blank? && - !value.original_filename.blank? - unless value.respond_to?(:full_original_filename) - class << value - alias_method :full_original_filename, :original_filename - - # Take the basename of the upload's original filename. - # This handles the full Windows paths given by Internet Explorer - # (and perhaps other broken user agents) without affecting - # those which give the lone filename. - # The Windows regexp is adapted from Perl's File::Basename. - def original_filename - if md = /^(?:.*[:\\\/])?(.*)/m.match(full_original_filename) - md.captures.first - else - File.basename full_original_filename - end - end - end - end - - # Return the same value after overriding original_filename. - value - - # Multipart values may have content type, but no filename. - elsif value.respond_to?(:read) - result = value.read - value.rewind - result - - # Unknown value, neither string nor multipart. - else - raise "Unknown form value: #{value.inspect}" - end - end - end - end - - class UrlEncodedPairParser < StringScanner #:nodoc: - attr_reader :top, :parent, :result - - def initialize(pairs = []) - super('') - @result = {} - pairs.each { |key, value| parse(key, value) } - end - - KEY_REGEXP = %r{([^\[\]=&]+)} - BRACKETED_KEY_REGEXP = %r{\[([^\[\]=&]+)\]} - - # Parse the query string - def parse(key, value) - self.string = key - @top, @parent = result, nil - - # First scan the bare key - key = scan(KEY_REGEXP) or return - key = post_key_check(key) - - # Then scan as many nestings as present - until eos? - r = scan(BRACKETED_KEY_REGEXP) or return - key = self[1] - key = post_key_check(key) - end - - bind(key, value) - end - - private - # After we see a key, we must look ahead to determine our next action. Cases: - # - # [] follows the key. Then the value must be an array. - # = follows the key. (A value comes next) - # & or the end of string follows the key. Then the key is a flag. - # otherwise, a hash follows the key. - def post_key_check(key) - if scan(/\[\]/) # a[b][] indicates that b is an array - container(key, Array) - nil - elsif check(/\[[^\]]/) # a[b] indicates that a is a hash - container(key, Hash) - nil - else # End of key? We do nothing. - key - end - end - - # Add a container to the stack. - def container(key, klass) - type_conflict! klass, top[key] if top.is_a?(Hash) && top.key?(key) && ! top[key].is_a?(klass) - value = bind(key, klass.new) - type_conflict! klass, value unless value.is_a?(klass) - push(value) - end - - # Push a value onto the 'stack', which is actually only the top 2 items. - def push(value) - @parent, @top = @top, value - end - - # Bind a key (which may be nil for items in an array) to the provided value. - def bind(key, value) - if top.is_a? Array - if key - if top[-1].is_a?(Hash) && ! top[-1].key?(key) - top[-1][key] = value - else - top << {key => value}.with_indifferent_access - push top.last - end - else - top << value - end - elsif top.is_a? Hash - key = CGI.unescape(key) - parent << (@top = {}) if top.key?(key) && parent.is_a?(Array) - return top[key] ||= value - else - raise ArgumentError, "Don't know what to do: top is #{top.inspect}" - end - - return value - end - - def type_conflict!(klass, value) - raise TypeError, "Conflicting types for parameter containers. Expected an instance of #{klass} but found an instance of #{value.class}. This can be caused by colliding Array and Hash parameters like qs[]=value&qs[key]=value." - end - end - end - end -end diff --git a/actionpack/lib/action_controller/cgi_ext/query_extension.rb b/actionpack/lib/action_controller/cgi_ext/query_extension.rb index 147530b5ce..9620fd2873 100644 --- a/actionpack/lib/action_controller/cgi_ext/query_extension.rb +++ b/actionpack/lib/action_controller/cgi_ext/query_extension.rb @@ -5,92 +5,18 @@ class CGI #:nodoc: # Remove the old initialize_query method before redefining it. remove_method :initialize_query - # Initialize the data from the query. - # - # Handles multipart forms (in particular, forms that involve file uploads). - # Reads query parameters in the @params field, and cookies into @cookies. + # Neuter CGI parameter parsing. def initialize_query - @cookies = CGI::Cookie::parse(env_table['HTTP_COOKIE'] || env_table['COOKIE']) - # Fix some strange request environments. - if method = env_table['REQUEST_METHOD'] - method = method.to_s.downcase.intern - else - method = :get - end + env_table['REQUEST_METHOD'] ||= 'GET' # POST assumes missing Content-Type is application/x-www-form-urlencoded. - content_type = env_table['CONTENT_TYPE'] - if content_type.blank? && method == :post - content_type = 'application/x-www-form-urlencoded' + if env_table['CONTENT_TYPE'].blank? && env_table['REQUEST_METHOD'] == 'POST' + env_table['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' end - # Force content length to zero if missing. - content_length = env_table['CONTENT_LENGTH'].to_i - - # Set multipart to false by default. - @multipart = false - - # POST and PUT may have params in entity body. If content type is missing - # or non-urlencoded, don't read the body or parse parameters: assume it's - # binary data. - if method == :post || method == :put - if boundary = extract_multipart_form_boundary(content_type) - @multipart = true - @params = read_multipart(boundary, content_length) - elsif content_type.blank? || content_type !~ %r{application/x-www-form-urlencoded}i - @params = {} - end - end - - @params ||= CGI.parse(read_params(method, content_length)) + @cookies = CGI::Cookie::parse(env_table['HTTP_COOKIE'] || env_table['COOKIE']) + @params = {} end - - private - unless defined?(MULTIPART_FORM_BOUNDARY_RE) - MULTIPART_FORM_BOUNDARY_RE = %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n #" - end - - def extract_multipart_form_boundary(content_type) - MULTIPART_FORM_BOUNDARY_RE.match(content_type).to_a.pop - end - - if defined? MOD_RUBY - def read_query - Apache::request.args || '' - end - else - def read_query - # fixes CGI querystring parsing for lighttpd - env_qs = env_table['QUERY_STRING'] - if env_qs.blank? && !(uri = env_table['REQUEST_URI']).blank? - uri.split('?', 2)[1] || '' - else - env_qs || '' - end - end - end - - def read_body(content_length) - stdinput.binmode if stdinput.respond_to?(:binmode) - content = stdinput.read(content_length) || '' - # Fix for Safari Ajax postings that always append \000 - content.chop! if content[-1] == 0 - content.gsub!(/&_=$/, '') - env_table['RAW_POST_DATA'] = content.freeze - end - - def read_params(method, content_length) - case method - when :get - read_query - when :post, :put - read_body(content_length) - when :cmd - read_from_cmdline - else # :head, :delete, :options, :trace, :connect - read_query - end - end - end # module QueryExtension + end end diff --git a/actionpack/lib/action_controller/cgi_process.rb b/actionpack/lib/action_controller/cgi_process.rb index 40a533ad6f..dcfa39fc03 100644 --- a/actionpack/lib/action_controller/cgi_process.rb +++ b/actionpack/lib/action_controller/cgi_process.rb @@ -47,12 +47,11 @@ module ActionController #:nodoc: end def query_string - if (qs = @cgi.query_string) && !qs.empty? + qs = @cgi.query_string + if !qs.blank? qs elsif uri = @env['REQUEST_URI'] - parts = uri.split('?') - parts.shift - parts.join('?') + uri.split('?', 2).last else @env['QUERY_STRING'] || '' end @@ -69,16 +68,11 @@ module ActionController #:nodoc: end def query_parameters - @query_parameters ||= CGI.parse_query_parameters(query_string) + @query_parameters ||= self.class.parse_query_parameters(query_string) end def request_parameters - @request_parameters ||= - if ActionController::Base.param_parsers.has_key?(content_type) - self.class.parse_formatted_request_parameters(content_type, body.read) - else - CGI.parse_request_parameters(@cgi.params) - end + @request_parameters ||= self.class.parse_formatted_request_parameters(body, content_type_with_parameters, content_length, env) end def cookies diff --git a/actionpack/lib/action_controller/integration.rb b/actionpack/lib/action_controller/integration.rb index 97e7427ea7..b78cae6c5b 100644 --- a/actionpack/lib/action_controller/integration.rb +++ b/actionpack/lib/action_controller/integration.rb @@ -306,7 +306,7 @@ module ActionController "REQUEST_URI" => "/", "HTTP_HOST" => host, "SERVER_PORT" => https? ? "443" : "80", - "HTTPS" => https? ? "on" : "off") + "HTTPS" => https? ? "on" : "off") ActionController::UrlRewriter.new(ActionController::CgiRequest.new(cgi), {}) end diff --git a/actionpack/lib/action_controller/request.rb b/actionpack/lib/action_controller/request.rb index 6a9b74d426..f5dfcbf457 100755 --- a/actionpack/lib/action_controller/request.rb +++ b/actionpack/lib/action_controller/request.rb @@ -1,3 +1,7 @@ +require 'tempfile' +require 'stringio' +require 'strscan' + module ActionController # CgiRequest and TestRequest provide concrete implementations. class AbstractRequest @@ -55,6 +59,14 @@ module ActionController @env end + def content_length + @content_length ||= env['CONTENT_LENGTH'].to_i + end + + def content_type_with_parameters + @content_type_with_parameters ||= env['CONTENT_TYPE'].to_s + end + # Determine whether the body of a HTTP call is URL-encoded (default) # or matches one of the registered param_parsers. # @@ -64,7 +76,7 @@ module ActionController @content_type ||= begin # Receive header sans any charset information. - content_type = @env['CONTENT_TYPE'].to_s.sub(/\s*\;.*$/, '').strip.downcase + content_type = content_type_with_parameters.sub(/\s*\;.*$/, '').strip.downcase if x_post_format = @env['HTTP_X_POST_DATA_FORMAT'] case x_post_format.to_s.downcase @@ -297,20 +309,350 @@ module ActionController end - def self.parse_formatted_request_parameters(mime_type, body) - case strategy = ActionController::Base.param_parsers[mime_type] - when Proc - strategy.call(body) - when :xml_simple, :xml_node - body.blank? ? {} : Hash.from_xml(body).with_indifferent_access - when :yaml - YAML.load(body) - else - {} + class << self + def parse_formatted_request_parameters(body, content_type, content_length, env = {}) + content_length = content_length.to_i + return {} if content_length.zero? + + content_type, boundary = extract_multipart_boundary(content_type.to_s) + return {} if content_type.blank? + + mime_type = Mime::Type.lookup(content_type) + strategy = ActionController::Base.param_parsers[mime_type] + + raise [content_type, content_length, mime_type, ActionController::Base.param_parsers].inspect unless strategy + + # Only multipart form parsing expects a stream. + if strategy && strategy != :multipart_form + body = body.read(content_length) + end + + case strategy + when Proc + strategy.call(body) + when :url_encoded_form + clean_up_ajax_request_body! body + parse_query_parameters(body) + when :multipart_form + parse_multipart_form_parameters(body, boundary, content_length, env) + when :xml_simple, :xml_node + body.blank? ? {} : Hash.from_xml(body).with_indifferent_access + when :yaml + YAML.load(body) + else + {} + end + rescue Exception => e # YAML, XML or Ruby code block errors + raise + { "body" => body, + "content_type" => content_type, + "content_length" => content_length, + "exception" => "#{e.message} (#{e.class})", + "backtrace" => e.backtrace } + end + + def parse_query_parameters(query_string) + return {} if query_string.blank? + + pairs = query_string.split('&').collect do |chunk| + next if chunk.empty? + key, value = chunk.split('=', 2) + next if key.empty? + value = value.nil? ? nil : CGI.unescape(value) + [ CGI.unescape(key), value ] + end.compact + + UrlEncodedPairParser.new(pairs).result + end + + def parse_request_parameters(params) + parser = UrlEncodedPairParser.new + + params = params.dup + until params.empty? + for key, value in params + if key.blank? + params.delete key + elsif !key.include?('[') + # much faster to test for the most common case first (GET) + # and avoid the call to build_deep_hash + parser.result[key] = get_typed_value(value[0]) + params.delete key + elsif value.is_a?(Array) + parser.parse(key, get_typed_value(value.shift)) + params.delete key if value.empty? + else + raise TypeError, "Expected array, found #{value.inspect}" + end + end + end + + parser.result + end + + def parse_multipart_form_parameters(body, boundary, content_length, env) + parse_request_parameters(read_multipart(body, boundary, content_length, env)) end - rescue Exception => e # YAML, XML or Ruby code block errors - { "exception" => "#{e.message} (#{e.class})", "backtrace" => e.backtrace, - "body" => body, "format" => mime_type } + + private + def get_typed_value(value) + case value + when String + value + when NilClass + '' + when Array + value.map { |v| get_typed_value(v) } + else + # Uploaded file provides content type and filename. + if value.respond_to?(:content_type) && + !value.content_type.blank? && + !value.original_filename.blank? + unless value.respond_to?(:full_original_filename) + class << value + alias_method :full_original_filename, :original_filename + + # Take the basename of the upload's original filename. + # This handles the full Windows paths given by Internet Explorer + # (and perhaps other broken user agents) without affecting + # those which give the lone filename. + # The Windows regexp is adapted from Perl's File::Basename. + def original_filename + if md = /^(?:.*[:\\\/])?(.*)/m.match(full_original_filename) + md.captures.first + else + File.basename full_original_filename + end + end + end + end + + # Return the same value after overriding original_filename. + value + + # Multipart values may have content type, but no filename. + elsif value.respond_to?(:read) + result = value.read + value.rewind + result + + # Unknown value, neither string nor multipart. + else + raise "Unknown form value: #{value.inspect}" + end + end + end + + + MULTIPART_BOUNDARY = %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n + + def extract_multipart_boundary(content_type) + if content_type =~ MULTIPART_BOUNDARY + ['multipart/form-data', $1.dup] + else + content_type + end + end + + def clean_up_ajax_request_body!(body) + body.chop! if body[-1] == 0 + body.gsub!(/&_=$/, '') + end + + + EOL = "\015\012" + + def read_multipart(body, boundary, content_length, env) + params = Hash.new([]) + boundary = "--" + boundary + quoted_boundary = Regexp.quote(boundary, "n") + buf = "" + bufsize = 10 * 1024 + boundary_end="" + + # start multipart/form-data + body.binmode if defined? body.binmode + boundary_size = boundary.size + EOL.size + content_length -= boundary_size + status = body.read(boundary_size) + if nil == status + raise EOFError, "no content body" + elsif boundary + EOL != status + raise EOFError, "bad content body" + end + + loop do + head = nil + content = + if 10240 < content_length + Tempfile.new("CGI") + else + StringIO.new + end + content.binmode if defined? content.binmode + + until head and /#{quoted_boundary}(?:#{EOL}|--)/n.match(buf) + + if (not head) and /#{EOL}#{EOL}/n.match(buf) + buf = buf.sub(/\A((?:.|\n)*?#{EOL})#{EOL}/n) do + head = $1.dup + "" + end + next + end + + if head and ( (EOL + boundary + EOL).size < buf.size ) + content.print buf[0 ... (buf.size - (EOL + boundary + EOL).size)] + buf[0 ... (buf.size - (EOL + boundary + EOL).size)] = "" + end + + c = if bufsize < content_length + body.read(bufsize) + else + body.read(content_length) + end + if c.nil? || c.empty? + raise EOFError, "bad content body" + end + buf.concat(c) + content_length -= c.size + end + + buf = buf.sub(/\A((?:.|\n)*?)(?:[\r\n]{1,2})?#{quoted_boundary}([\r\n]{1,2}|--)/n) do + content.print $1 + if "--" == $2 + content_length = -1 + end + boundary_end = $2.dup + "" + end + + content.rewind + + /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;]*))/ni.match(head) + filename = ($1 or $2 or "") + if /Mac/ni.match(env['HTTP_USER_AGENT']) and + /Mozilla/ni.match(env['HTTP_USER_AGENT']) and + (not /MSIE/ni.match(env['HTTP_USER_AGENT'])) + filename = CGI.unescape(filename) + end + + /Content-Type: (.*)/ni.match(head) + content_type = ($1 or "") + + (class << content; self; end).class_eval do + alias local_path path + define_method(:original_filename) {filename.dup.taint} + define_method(:content_type) {content_type.dup.taint} + end + + /Content-Disposition:.* name="?([^\";]*)"?/ni.match(head) + name = $1.dup + + if params.has_key?(name) + params[name].push(content) + else + params[name] = [content] + end + break if buf.size == 0 + break if content_length == -1 + end + raise EOFError, "bad boundary end of body part" unless boundary_end=~/--/ + + params + end + end + end + + class UrlEncodedPairParser < StringScanner #:nodoc: + attr_reader :top, :parent, :result + + def initialize(pairs = []) + super('') + @result = {} + pairs.each { |key, value| parse(key, value) } end + + KEY_REGEXP = %r{([^\[\]=&]+)} + BRACKETED_KEY_REGEXP = %r{\[([^\[\]=&]+)\]} + + # Parse the query string + def parse(key, value) + self.string = key + @top, @parent = result, nil + + # First scan the bare key + key = scan(KEY_REGEXP) or return + key = post_key_check(key) + + # Then scan as many nestings as present + until eos? + r = scan(BRACKETED_KEY_REGEXP) or return + key = self[1] + key = post_key_check(key) + end + + bind(key, value) + end + + private + # After we see a key, we must look ahead to determine our next action. Cases: + # + # [] follows the key. Then the value must be an array. + # = follows the key. (A value comes next) + # & or the end of string follows the key. Then the key is a flag. + # otherwise, a hash follows the key. + def post_key_check(key) + if scan(/\[\]/) # a[b][] indicates that b is an array + container(key, Array) + nil + elsif check(/\[[^\]]/) # a[b] indicates that a is a hash + container(key, Hash) + nil + else # End of key? We do nothing. + key + end + end + + # Add a container to the stack. + def container(key, klass) + type_conflict! klass, top[key] if top.is_a?(Hash) && top.key?(key) && ! top[key].is_a?(klass) + value = bind(key, klass.new) + type_conflict! klass, value unless value.is_a?(klass) + push(value) + end + + # Push a value onto the 'stack', which is actually only the top 2 items. + def push(value) + @parent, @top = @top, value + end + + # Bind a key (which may be nil for items in an array) to the provided value. + def bind(key, value) + if top.is_a? Array + if key + if top[-1].is_a?(Hash) && ! top[-1].key?(key) + top[-1][key] = value + else + top << {key => value}.with_indifferent_access + push top.last + end + else + top << value + end + elsif top.is_a? Hash + key = CGI.unescape(key) + parent << (@top = {}) if top.key?(key) && parent.is_a?(Array) + return top[key] ||= value + else + raise ArgumentError, "Don't know what to do: top is #{top.inspect}" + end + + return value + end + + def type_conflict!(klass, value) + raise TypeError, "Conflicting types for parameter containers. Expected an instance of #{klass} but found an instance of #{value.class}. This can be caused by colliding Array and Hash parameters like qs[]=value&qs[key]=value." + end end end -- cgit v1.2.3