From 85243e56613b9c656860f9f4b6b7535b6d46960a Mon Sep 17 00:00:00 2001 From: Jan Tvrdik Date: Thu, 18 Dec 2014 16:48:26 +0100 Subject: [PATCH 1/2] Url::unescape() optimized for performance --- src/Http/Url.php | 16 ++++++++-------- tests/Http/Url.unescape.phpt | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 tests/Http/Url.unescape.phpt diff --git a/src/Http/Url.php b/src/Http/Url.php index 3d58e975..9787fc30 100644 --- a/src/Http/Url.php +++ b/src/Http/Url.php @@ -451,7 +451,7 @@ public function __toString() /** - * Similar to rawurldecode, but preserve reserved chars encoded. + * Similar to rawurldecode, but preserves reserved chars encoded. * @param string to decode * @param string reserved characters * @return string @@ -461,14 +461,14 @@ public static function unescape($s, $reserved = '%;/?:@&=+$,') // reserved (@see RFC 2396) = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," // within a path segment, the characters "/", ";", "=", "?" are reserved // within a query component, the characters ";", "/", "?", ":", "@", "&", "=", "+", ",", "$" are reserved. - preg_match_all('#(?<=%)[a-f0-9][a-f0-9]#i', $s, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); - foreach (array_reverse($matches) as $match) { - $ch = chr(hexdec($match[0][0])); - if (strpos($reserved, $ch) === FALSE) { - $s = substr_replace($s, $ch, $match[0][1] - 1, 3); - } + if ($reserved !== '') { + $s = preg_replace( + '#%(' . substr(chunk_split(bin2hex($reserved), 2, '|'), 0, -1) . ')#i', + '%25$1', + $s + ); } - return $s; + return rawurldecode($s); } } diff --git a/tests/Http/Url.unescape.phpt b/tests/Http/Url.unescape.phpt new file mode 100644 index 00000000..0f8f7af3 --- /dev/null +++ b/tests/Http/Url.unescape.phpt @@ -0,0 +1,21 @@ + Date: Sun, 21 Dec 2014 01:36:11 +0100 Subject: [PATCH 2/2] Url::unescape() unifies %aa to %AA --- src/Http/Url.php | 4 ++-- tests/Http/Url.unescape.phpt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Http/Url.php b/src/Http/Url.php index 9787fc30..039d1724 100644 --- a/src/Http/Url.php +++ b/src/Http/Url.php @@ -462,9 +462,9 @@ public static function unescape($s, $reserved = '%;/?:@&=+$,') // within a path segment, the characters "/", ";", "=", "?" are reserved // within a query component, the characters ";", "/", "?", ":", "@", "&", "=", "+", ",", "$" are reserved. if ($reserved !== '') { - $s = preg_replace( + $s = preg_replace_callback( '#%(' . substr(chunk_split(bin2hex($reserved), 2, '|'), 0, -1) . ')#i', - '%25$1', + function($m) { return '%25' . strtoupper($m[1]); }, $s ); } diff --git a/tests/Http/Url.unescape.phpt b/tests/Http/Url.unescape.phpt index 0f8f7af3..789f29ee 100644 --- a/tests/Http/Url.unescape.phpt +++ b/tests/Http/Url.unescape.phpt @@ -17,5 +17,5 @@ Assert::same( 'foo', Url::unescape('%66%6F%6F', '') ); Assert::same( 'f%6F%6F', Url::unescape('%66%6F%6F', 'o') ); Assert::same( '%66oo', Url::unescape('%66%6F%6F', 'f') ); Assert::same( '%66%6F%6F', Url::unescape('%66%6F%6F', 'fo') ); -Assert::same( '%66%6f%6f', Url::unescape('%66%6f%6f', 'fo') ); +Assert::same( '%66%6F%6F', Url::unescape('%66%6f%6f', 'fo') ); Assert::same( "%00\x01%02", Url::unescape('%00%01%02', "\x00\x02") );