Optimize URI escaping

The URI::Parser#escape method is a general use method that has to deal with a variety of input however our use of it is limited in scope so we can increase the performance by implementing our specific needs within ActionDispatch::Journey::Router::Utils directly. If there is no encoding required then there is no change in performance or number of objects allocated, but for each character that needs to be encoded we save five object allocations and gain a performance boost. The performance boost seen varies from 20% when there is one character to over 50% when encoding ten characters.
author: Andrew White <andyw@pixeltrix.co.uk> 2014-04-19 18:37:41 +0100
committer: Andrew White <andyw@pixeltrix.co.uk> 2014-04-20 10:11:38 +0100
commit: a61792574d9c8904590895f7a2f56803e02a6c52 (patch)
tree: 9b3e1d9bf7809137f4ffe46defa8f47fcfa4406b /actionpack/lib/action_dispatch/journey/router/utils.rb
parent: e2ef83f8387679ce540d745659a79dd13164f9b5 (diff)
download: rails-a61792574d9c8904590895f7a2f56803e02a6c52.tar.gz
rails-a61792574d9c8904590895f7a2f56803e02a6c52.tar.bz2
rails-a61792574d9c8904590895f7a2f56803e02a6c52.zip
1 files changed, 42 insertions, 17 deletions
diff --git a/actionpack/lib/action_dispatch/journey/router/utils.rb b/actionpack/lib/action_dispatch/journey/router/utils.rb
index 371de21f68..246d91da01 100644
--- a/actionpack/lib/action_dispatch/journey/router/utils.rb
+++ b/actionpack/lib/action_dispatch/journey/router/utils.rb
@@ -1,5 +1,3 @@
-require 'uri'
-
 module ActionDispatch
   module Journey # :nodoc:
     class Router # :nodoc:
@@ -25,31 +23,58 @@ module ActionDispatch
 
         # URI path and fragment escaping
         # http://tools.ietf.org/html/rfc3986
-        module UriEscape # :nodoc:
-          # Symbol captures can generate multiple path segments, so include /.
-          reserved_segment  = '/'
-          reserved_fragment = '/?'
-          reserved_pchar    = ':@&=+$,;'
-
-          safe_pchar    = "#{URI::REGEXP::PATTERN::UNRESERVED}#{reserved_pchar}"
-          safe_segment  = "#{safe_pchar}#{reserved_segment}"
-          safe_fragment = "#{safe_pchar}#{reserved_fragment}"
-          UNSAFE_SEGMENT  = Regexp.new("[^#{safe_segment}]", false).freeze
-          UNSAFE_FRAGMENT = Regexp.new("[^#{safe_fragment}]", false).freeze
+        class UriEncoder # :nodoc:
+          ENCODE   = "%%%02X".freeze
+          ENCODING = Encoding::US_ASCII
+          EMPTY    = "".force_encoding(ENCODING).freeze
+          DEC2HEX  = (0..255).to_a.map{ |i| ENCODE % i }.map{ |s| s.force_encoding(ENCODING) }
+
+          ALPHA = "a-zA-Z".freeze
+          DIGIT = "0-9".freeze
+          UNRESERVED = "#{ALPHA}#{DIGIT}\\-\\._~".freeze
+          SUB_DELIMS = "!\\$&'\\(\\)\\*\\+,;=".freeze
+
+          ESCAPED  = /%[a-zA-Z0-9]{2}/.freeze
+
+          FRAGMENT = /[^#{UNRESERVED}#{SUB_DELIMS}:@\/\?]/.freeze
+          PATH     = /[^#{UNRESERVED}#{SUB_DELIMS}:@\/]/.freeze
+
+          def escape_fragment(fragment)
+            escape(fragment, FRAGMENT)
+          end
+
+          def escape_path(path)
+            escape(path, PATH)
+          end
+
+          def unescape_uri(uri)
+            uri.gsub(ESCAPED) { [$&[1, 2].hex].pack('C') }.force_encoding(uri.encoding)
+          end
+
+          protected
+            def escape(component, pattern)
+              component.gsub(pattern){ |unsafe| percent_encode(unsafe) }.force_encoding(ENCODING)
+            end
+
+            def percent_encode(unsafe)
+              safe = EMPTY.dup
+              unsafe.each_byte { |b| safe << DEC2HEX[b] }
+              safe
+            end
         end
 
-        Parser = URI::Parser.new
+        ENCODER = UriEncoder.new
 
         def self.escape_path(path)
-          Parser.escape(path.to_s, UriEscape::UNSAFE_SEGMENT)
+          ENCODER.escape_path(path.to_s)
         end
 
         def self.escape_fragment(fragment)
-          Parser.escape(fragment.to_s, UriEscape::UNSAFE_FRAGMENT)
+          ENCODER.escape_fragment(fragment.to_s)
         end
 
         def self.unescape_uri(uri)
-          Parser.unescape(uri)
+          ENCODER.unescape_uri(uri)
         end
       end
     end
author	Andrew White <andyw@pixeltrix.co.uk>	2014-04-19 18:37:41 +0100
committer	Andrew White <andyw@pixeltrix.co.uk>	2014-04-20 10:11:38 +0100
commit	a61792574d9c8904590895f7a2f56803e02a6c52 (patch)
tree	9b3e1d9bf7809137f4ffe46defa8f47fcfa4406b /actionpack/lib/action_dispatch/journey/router/utils.rb
parent	e2ef83f8387679ce540d745659a79dd13164f9b5 (diff)
download	rails-a61792574d9c8904590895f7a2f56803e02a6c52.tar.gz rails-a61792574d9c8904590895f7a2f56803e02a6c52.tar.bz2 rails-a61792574d9c8904590895f7a2f56803e02a6c52.zip