File | /usr/local/lib/perl5/site_perl/5.10.1/URI/Escape.pm |
Statements Executed | 6592 |
Statement Execution Time | 14.9ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
582 | 2 | 1 | 3.24ms | 14.1ms | uri_escape_utf8 | URI::Escape::
582 | 1 | 1 | 3.16ms | 10.5ms | uri_escape | URI::Escape::
1471 | 1 | 2 | 1.30ms | 1.30ms | CORE:substcont (opcode) | URI::Escape::
123 | 1 | 1 | 744µs | 796µs | uri_unescape | URI::Escape::
706 | 3 | 3 | 577µs | 577µs | CORE:subst (opcode) | URI::Escape::
1 | 1 | 1 | 56µs | 63µs | BEGIN@2 | URI::Escape::
1 | 1 | 1 | 8µs | 65µs | BEGIN@141 | URI::Escape::
1 | 1 | 1 | 7µs | 22µs | BEGIN@142 | URI::Escape::
1 | 1 | 1 | 4µs | 4µs | BEGIN@150 | URI::Escape::
0 | 0 | 0 | 0s | 0s | _fail_hi | URI::Escape::
0 | 0 | 0 | 0s | 0s | escape_char | URI::Escape::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package URI::Escape; | ||||
2 | 3 | 64µs | 2 | 70µs | # spent 63µs (56+7) within URI::Escape::BEGIN@2 which was called
# once (56µs+7µs) by URI::BEGIN@22 at line 2 # spent 63µs making 1 call to URI::Escape::BEGIN@2
# spent 7µs making 1 call to strict::import |
3 | |||||
4 | =head1 NAME | ||||
5 | |||||
6 | URI::Escape - Escape and unescape unsafe characters | ||||
7 | |||||
8 | =head1 SYNOPSIS | ||||
9 | |||||
10 | use URI::Escape; | ||||
11 | $safe = uri_escape("10% is enough\n"); | ||||
12 | $verysafe = uri_escape("foo", "\0-\377"); | ||||
13 | $str = uri_unescape($safe); | ||||
14 | |||||
15 | =head1 DESCRIPTION | ||||
16 | |||||
17 | This module provides functions to escape and unescape URI strings as | ||||
18 | defined by RFC 2396 (and updated by RFC 2732). | ||||
19 | A URI consists of a restricted set of characters, | ||||
20 | denoted as C<uric> in RFC 2396. The restricted set of characters | ||||
21 | consists of digits, letters, and a few graphic symbols chosen from | ||||
22 | those common to most of the character encodings and input facilities | ||||
23 | available to Internet users: | ||||
24 | |||||
25 | "A" .. "Z", "a" .. "z", "0" .. "9", | ||||
26 | ";", "/", "?", ":", "@", "&", "=", "+", "$", ",", "[", "]", # reserved | ||||
27 | "-", "_", ".", "!", "~", "*", "'", "(", ")" | ||||
28 | |||||
29 | In addition, any byte (octet) can be represented in a URI by an escape | ||||
30 | sequence: a triplet consisting of the character "%" followed by two | ||||
31 | hexadecimal digits. A byte can also be represented directly by a | ||||
32 | character, using the US-ASCII character for that octet (iff the | ||||
33 | character is part of C<uric>). | ||||
34 | |||||
35 | Some of the C<uric> characters are I<reserved> for use as delimiters | ||||
36 | or as part of certain URI components. These must be escaped if they are | ||||
37 | to be treated as ordinary data. Read RFC 2396 for further details. | ||||
38 | |||||
39 | The functions provided (and exported by default) from this module are: | ||||
40 | |||||
41 | =over 4 | ||||
42 | |||||
43 | =item uri_escape( $string ) | ||||
44 | |||||
45 | =item uri_escape( $string, $unsafe ) | ||||
46 | |||||
47 | Replaces each unsafe character in the $string with the corresponding | ||||
48 | escape sequence and returns the result. The $string argument should | ||||
49 | be a string of bytes. The uri_escape() function will croak if given a | ||||
50 | characters with code above 255. Use uri_escape_utf8() if you know you | ||||
51 | have such chars or/and want chars in the 128 .. 255 range treated as | ||||
52 | UTF-8. | ||||
53 | |||||
54 | The uri_escape() function takes an optional second argument that | ||||
55 | overrides the set of characters that are to be escaped. The set is | ||||
56 | specified as a string that can be used in a regular expression | ||||
57 | character class (between [ ]). E.g.: | ||||
58 | |||||
59 | "\x00-\x1f\x7f-\xff" # all control and hi-bit characters | ||||
60 | "a-z" # all lower case characters | ||||
61 | "^A-Za-z" # everything not a letter | ||||
62 | |||||
63 | The default set of characters to be escaped is all those which are | ||||
64 | I<not> part of the C<uric> character class shown above as well as the | ||||
65 | reserved characters. I.e. the default is: | ||||
66 | |||||
67 | "^A-Za-z0-9\-_.!~*'()" | ||||
68 | |||||
69 | =item uri_escape_utf8( $string ) | ||||
70 | |||||
71 | =item uri_escape_utf8( $string, $unsafe ) | ||||
72 | |||||
73 | Works like uri_escape(), but will encode chars as UTF-8 before | ||||
74 | escaping them. This makes this function able do deal with characters | ||||
75 | with code above 255 in $string. Note that chars in the 128 .. 255 | ||||
76 | range will be escaped differently by this function compared to what | ||||
77 | uri_escape() would. For chars in the 0 .. 127 range there is no | ||||
78 | difference. | ||||
79 | |||||
80 | The call: | ||||
81 | |||||
82 | $uri = uri_escape_utf8($string); | ||||
83 | |||||
84 | will be the same as: | ||||
85 | |||||
86 | use Encode qw(encode); | ||||
87 | $uri = uri_escape(encode("UTF-8", $string)); | ||||
88 | |||||
89 | but will even work for perl-5.6 for chars in the 128 .. 255 range. | ||||
90 | |||||
91 | Note: Javascript has a function called escape() that produces the | ||||
92 | sequence "%uXXXX" for chars in the 256 .. 65535 range. This function | ||||
93 | has really nothing to do with URI escaping but some folks got confused | ||||
94 | since it "does the right thing" in the 0 .. 255 range. Because of | ||||
95 | this you sometimes see "URIs" with these kind of escapes. The | ||||
96 | JavaScript encodeURIComponent() function is similar to uri_escape_utf8(). | ||||
97 | |||||
98 | =item uri_unescape($string,...) | ||||
99 | |||||
100 | Returns a string with each %XX sequence replaced with the actual byte | ||||
101 | (octet). | ||||
102 | |||||
103 | This does the same as: | ||||
104 | |||||
105 | $string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
106 | |||||
107 | but does not modify the string in-place as this RE would. Using the | ||||
108 | uri_unescape() function instead of the RE might make the code look | ||||
109 | cleaner and is a few characters less to type. | ||||
110 | |||||
111 | In a simple benchmark test I did, | ||||
112 | calling the function (instead of the inline RE above) if a few chars | ||||
113 | were unescaped was something like 40% slower, and something like 700% slower if none were. If | ||||
114 | you are going to unescape a lot of times it might be a good idea to | ||||
115 | inline the RE. | ||||
116 | |||||
117 | If the uri_unescape() function is passed multiple strings, then each | ||||
118 | one is returned unescaped. | ||||
119 | |||||
120 | =back | ||||
121 | |||||
122 | The module can also export the C<%escapes> hash, which contains the | ||||
123 | mapping from all 256 bytes to the corresponding escape codes. Lookup | ||||
124 | in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))> | ||||
125 | each time. | ||||
126 | |||||
127 | =head1 SEE ALSO | ||||
128 | |||||
129 | L<URI> | ||||
130 | |||||
131 | |||||
132 | =head1 COPYRIGHT | ||||
133 | |||||
134 | Copyright 1995-2004 Gisle Aas. | ||||
135 | |||||
136 | This program is free software; you can redistribute it and/or modify | ||||
137 | it under the same terms as Perl itself. | ||||
138 | |||||
139 | =cut | ||||
140 | |||||
141 | 3 | 26µs | 2 | 122µs | # spent 65µs (8+57) within URI::Escape::BEGIN@141 which was called
# once (8µs+57µs) by URI::BEGIN@22 at line 141 # spent 65µs making 1 call to URI::Escape::BEGIN@141
# spent 57µs making 1 call to vars::import |
142 | 3 | 42µs | 2 | 38µs | # spent 22µs (7+15) within URI::Escape::BEGIN@142 which was called
# once (7µs+15µs) by URI::BEGIN@22 at line 142 # spent 22µs making 1 call to URI::Escape::BEGIN@142
# spent 15µs making 1 call to vars::import |
143 | |||||
144 | 1 | 800ns | require Exporter; | ||
145 | 1 | 8µs | @ISA = qw(Exporter); | ||
146 | 1 | 800ns | @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8); | ||
147 | 1 | 300ns | @EXPORT_OK = qw(%escapes); | ||
148 | 1 | 300ns | $VERSION = "3.29"; | ||
149 | |||||
150 | 3 | 368µs | 1 | 4µs | # spent 4µs within URI::Escape::BEGIN@150 which was called
# once (4µs+0s) by URI::BEGIN@22 at line 150 # spent 4µs making 1 call to URI::Escape::BEGIN@150 |
151 | |||||
152 | # Build a char->hex map | ||||
153 | 1 | 2µs | for (0..255) { | ||
154 | 256 | 342µs | $escapes{chr($_)} = sprintf("%%%02X", $_); | ||
155 | } | ||||
156 | |||||
157 | 1 | 200ns | my %subst; # compiled patternes | ||
158 | |||||
159 | sub uri_escape | ||||
160 | # spent 10.5ms (3.16+7.30) within URI::Escape::uri_escape which was called 582 times, avg 18µs/call:
# 582 times (3.16ms+7.30ms) by URI::Escape::uri_escape_utf8 at line 193, avg 18µs/call | ||||
161 | 3495 | 2.66ms | my($text, $patn) = @_; | ||
162 | return undef unless defined $text; | ||||
163 | if (defined $patn){ | ||||
164 | unless (exists $subst{$patn}) { | ||||
165 | # Because we can't compile the regex we fake it with a cached sub | ||||
166 | (my $tmp = $patn) =~ s,/,\\/,g; # spent 800ns making 1 call to URI::Escape::CORE:subst | ||||
167 | 1 | 7.41ms | 2053 | 1.83ms | eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} || _fail_hi(\$1)/ge; }"; # spent 1.30ms making 1471 calls to URI::Escape::CORE:substcont, avg 887ns/call
# spent 523µs making 582 calls to URI::Escape::CORE:subst, avg 899ns/call |
168 | Carp::croak("uri_escape: $@") if $@; | ||||
169 | } | ||||
170 | &{$subst{$patn}}($text); # spent 7.30ms making 582 calls to URI::Escape::__ANON__[(eval 0)[URI/Escape.pm:167]:1], avg 13µs/call | ||||
171 | } else { | ||||
172 | # Default unsafe characters. RFC 2732 ^(uric - reserved) | ||||
173 | $text =~ s/([^A-Za-z0-9\-_.!~*'()])/$escapes{$1} || _fail_hi($1)/ge; | ||||
174 | } | ||||
175 | $text; | ||||
176 | } | ||||
177 | |||||
178 | sub _fail_hi { | ||||
179 | my $chr = shift; | ||||
180 | Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr)); | ||||
181 | } | ||||
182 | |||||
183 | sub uri_escape_utf8 | ||||
184 | # spent 14.1ms (3.24+10.9) within URI::Escape::uri_escape_utf8 which was called 582 times, avg 24µs/call:
# 541 times (3.00ms+9.82ms) by SimpleDB::Client::construct_request at line 172 of SimpleDB/Client.pm, avg 24µs/call
# 41 times (237µs+1.03ms) by SimpleDB::Client::construct_request at line 179 of SimpleDB/Client.pm, avg 31µs/call | ||||
185 | 2328 | 3.21ms | my $text = shift; | ||
186 | if ($] < 5.008) { | ||||
187 | $text =~ s/([^\0-\x7F])/do {my $o = ord($1); sprintf("%c%c", 0xc0 | ($o >> 6), 0x80 | ($o & 0x3f)) }/ge; | ||||
188 | } | ||||
189 | else { | ||||
190 | utf8::encode($text); # spent 391µs making 582 calls to utf8::encode, avg 671ns/call | ||||
191 | } | ||||
192 | |||||
193 | return uri_escape($text, @_); # spent 10.5ms making 582 calls to URI::Escape::uri_escape, avg 18µs/call | ||||
194 | } | ||||
195 | |||||
196 | sub uri_unescape | ||||
197 | # spent 796µs (744+53) within URI::Escape::uri_unescape which was called 123 times, avg 6µs/call:
# 123 times (744µs+53µs) by URI::_server::host at line 90 of URI/_server.pm, avg 6µs/call | ||||
198 | # Note from RFC1630: "Sequences which start with a percent sign | ||||
199 | # but are not followed by two hexadecimal characters are reserved | ||||
200 | # for future extension" | ||||
201 | 492 | 739µs | my $str = shift; | ||
202 | if (@_ && wantarray) { | ||||
203 | # not executed for the common case of a single argument | ||||
204 | my @str = ($str, @_); # need to copy | ||||
205 | foreach (@str) { | ||||
206 | s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; | ||||
207 | } | ||||
208 | return @str; | ||||
209 | } | ||||
210 | $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; # spent 53µs making 123 calls to URI::Escape::CORE:subst, avg 428ns/call | ||||
211 | $str; | ||||
212 | } | ||||
213 | |||||
214 | sub escape_char { | ||||
215 | return join '', @URI::Escape::escapes{$_[0] =~ /(\C)/g}; | ||||
216 | } | ||||
217 | |||||
218 | 1 | 12µs | 1; | ||
# spent 577µs within URI::Escape::CORE:subst which was called 706 times, avg 817ns/call:
# 582 times (523µs+0s) by URI::Escape::__ANON__[(eval 0)[/usr/local/lib/perl5/site_perl/5.10.1/URI/Escape.pm:167]:1] at line 1 of (eval 0)[URI/Escape.pm:167] at line 167 of URI/Escape.pm, avg 899ns/call
# 123 times (53µs+0s) by URI::Escape::uri_unescape at line 210 of URI/Escape.pm, avg 428ns/call
# once (800ns+0s) by URI::Escape::uri_escape at line 166 of URI/Escape.pm | |||||
# spent 1.30ms within URI::Escape::CORE:substcont which was called 1471 times, avg 887ns/call:
# 1471 times (1.30ms+0s) by URI::Escape::__ANON__[(eval 0)[/usr/local/lib/perl5/site_perl/5.10.1/URI/Escape.pm:167]:1] at line 1 of (eval 0)[URI/Escape.pm:167] at line 167 of URI/Escape.pm, avg 887ns/call |