File | /usr/local/lib/perl5/site_perl/5.10.1/URI.pm |
Statements Executed | 223 |
Statement Execution Time | 14.6ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
4 | 1 | 1 | 3.28ms | 3.51ms | implementor | URI::
1 | 1 | 1 | 882µs | 976µs | BEGIN@22 | URI::
4 | 1 | 1 | 131µs | 2.37ms | _init | URI::
4 | 2 | 2 | 130µs | 6.06ms | new | URI::
9 | 2 | 1 | 75µs | 123µs | _scheme | URI::
5 | 5 | 2 | 74µs | 74µs | CORE:regcomp (opcode) | URI::
27 | 6 | 2 | 63µs | 63µs | CORE:match (opcode) | URI::
3 | 1 | 1 | 53µs | 107µs | canonical | URI::
6 | 2 | 1 | 41µs | 116µs | scheme | URI::
4 | 1 | 1 | 27µs | 39µs | _uric_escape | URI::
23 | 8 | 2 | 26µs | 26µs | CORE:subst (opcode) | URI::
1 | 1 | 1 | 16µs | 67µs | BEGIN@24 | URI::
1 | 1 | 1 | 14µs | 17µs | BEGIN@3 | URI::
3 | 1 | 1 | 12µs | 12µs | as_string | URI::
3 | 1 | 1 | 9µs | 9µs | __ANON__[:24] | URI::
1 | 1 | 1 | 9µs | 27µs | BEGIN@127 | URI::
1 | 1 | 1 | 6µs | 23µs | BEGIN@4 | URI::
1 | 1 | 1 | 6µs | 39µs | BEGIN@7 | URI::
1 | 1 | 1 | 6µs | 55µs | BEGIN@13 | URI::
1 | 1 | 1 | 3µs | 3µs | BEGIN@21 | URI::
1 | 1 | 1 | 3µs | 3µs | _init_implementor | URI::
0 | 0 | 0 | 0s | 0s | STORABLE_freeze | URI::
0 | 0 | 0 | 0s | 0s | STORABLE_thaw | URI::
0 | 0 | 0 | 0s | 0s | __ANON__[:25] | URI::
0 | 0 | 0 | 0s | 0s | __ANON__[:26] | URI::
0 | 0 | 0 | 0s | 0s | _no_scheme_ok | URI::
0 | 0 | 0 | 0s | 0s | _obj_eq | URI::
0 | 0 | 0 | 0s | 0s | abs | URI::
0 | 0 | 0 | 0s | 0s | as_iri | URI::
0 | 0 | 0 | 0s | 0s | clone | URI::
0 | 0 | 0 | 0s | 0s | eq | URI::
0 | 0 | 0 | 0s | 0s | fragment | URI::
0 | 0 | 0 | 0s | 0s | new_abs | URI::
0 | 0 | 0 | 0s | 0s | opaque | URI::
0 | 0 | 0 | 0s | 0s | rel | URI::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package URI; | ||||
2 | |||||
3 | 3 | 24µs | 2 | 20µs | # spent 17µs (14+3) within URI::BEGIN@3 which was called
# once (14µs+3µs) by LWP::UserAgent::BEGIN@10 at line 3 # spent 17µs making 1 call to URI::BEGIN@3
# spent 3µs making 1 call to strict::import |
4 | 3 | 28µs | 2 | 41µs | # spent 23µs (6+17) within URI::BEGIN@4 which was called
# once (6µs+17µs) by LWP::UserAgent::BEGIN@10 at line 4 # spent 23µs making 1 call to URI::BEGIN@4
# spent 17µs making 1 call to vars::import |
5 | 1 | 1µs | $VERSION = "1.51"; | ||
6 | |||||
7 | 3 | 27µs | 2 | 73µs | # spent 39µs (6+33) within URI::BEGIN@7 which was called
# once (6µs+33µs) by LWP::UserAgent::BEGIN@10 at line 7 # spent 39µs making 1 call to URI::BEGIN@7
# spent 33µs making 1 call to vars::import |
8 | |||||
9 | 1 | 200ns | my %implements; # mapping from scheme to implementor class | ||
10 | |||||
11 | # Some "official" character classes | ||||
12 | |||||
13 | 3 | 49µs | 2 | 104µs | # spent 55µs (6+49) within URI::BEGIN@13 which was called
# once (6µs+49µs) by LWP::UserAgent::BEGIN@10 at line 13 # spent 55µs making 1 call to URI::BEGIN@13
# spent 49µs making 1 call to vars::import |
14 | 1 | 400ns | $reserved = q(;/?:@&=+$,[]); | ||
15 | 1 | 500ns | $mark = q(-_.!~*'()); #'; emacs | ||
16 | 1 | 3µs | $unreserved = "A-Za-z0-9\Q$mark\E"; | ||
17 | 1 | 2µs | $uric = quotemeta($reserved) . $unreserved . "%"; | ||
18 | |||||
19 | 1 | 300ns | $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*'; | ||
20 | |||||
21 | 3 | 17µs | 1 | 3µs | # spent 3µs within URI::BEGIN@21 which was called
# once (3µs+0s) by LWP::UserAgent::BEGIN@10 at line 21 # spent 3µs making 1 call to URI::BEGIN@21 |
22 | 3 | 182µs | 1 | 976µs | # spent 976µs (882+94) within URI::BEGIN@22 which was called
# once (882µs+94µs) by LWP::UserAgent::BEGIN@10 at line 22 # spent 976µs making 1 call to URI::BEGIN@22 |
23 | |||||
24 | 3 | 13µs | # spent 67µs (16+51) within URI::BEGIN@24 which was called
# once (16µs+51µs) by LWP::UserAgent::BEGIN@10 at line 28
# spent 9µs within URI::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/URI.pm:24] which was called 3 times, avg 3µs/call:
# 3 times (9µs+0s) by LWP::UserAgent::prepare_request at line 210 of LWP/UserAgent.pm, avg 3µs/call | ||
25 | '==' => sub { _obj_eq(@_) }, | ||||
26 | '!=' => sub { !_obj_eq(@_) }, | ||||
27 | 1 | 19µs | 1 | 51µs | fallback => 1, # spent 51µs making 1 call to overload::import |
28 | 2 | 402µs | 1 | 67µs | ); # spent 67µs making 1 call to URI::BEGIN@24 |
29 | |||||
30 | # Check if two objects are the same object | ||||
31 | sub _obj_eq { | ||||
32 | return overload::StrVal($_[0]) eq overload::StrVal($_[1]); | ||||
33 | } | ||||
34 | |||||
35 | sub new | ||||
36 | # spent 6.06ms (130µs+5.93) within URI::new which was called 4 times, avg 1.52ms/call:
# 3 times (80µs+431µs) by HTTP::Request::uri at line 80 of HTTP/Request.pm, avg 170µs/call
# once (50µs+5.50ms) by SimpleDB::Client::__ANON__[../lib/SimpleDB/Client.pm:120] at line 120 of ../lib/SimpleDB/Client.pm | ||||
37 | 4 | 5µs | my($class, $uri, $scheme) = @_; | ||
38 | |||||
39 | 4 | 5µs | $uri = defined ($uri) ? "$uri" : ""; # stringify | ||
40 | # Get rid of potential wrapping | ||||
41 | 4 | 20µs | 4 | 4µs | $uri =~ s/^<(?:URL:)?(.*)>$/$1/; # # spent 4µs making 4 calls to URI::CORE:subst, avg 975ns/call |
42 | 4 | 11µs | 4 | 2µs | $uri =~ s/^"(.*)"$/$1/; # spent 2µs making 4 calls to URI::CORE:subst, avg 500ns/call |
43 | 4 | 17µs | 4 | 7µs | $uri =~ s/^\s+//; # spent 7µs making 4 calls to URI::CORE:subst, avg 2µs/call |
44 | 4 | 16µs | 4 | 6µs | $uri =~ s/\s+$//; # spent 6µs making 4 calls to URI::CORE:subst, avg 1µs/call |
45 | |||||
46 | 4 | 700ns | my $impclass; | ||
47 | 4 | 65µs | 5 | 39µs | if ($uri =~ m/^($scheme_re):/so) { # spent 27µs making 1 call to URI::CORE:regcomp
# spent 12µs making 4 calls to URI::CORE:match, avg 3µs/call |
48 | $scheme = $1; | ||||
49 | } | ||||
50 | else { | ||||
51 | if (($impclass = ref($scheme))) { | ||||
52 | $scheme = $scheme->scheme; | ||||
53 | } | ||||
54 | elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) { | ||||
55 | $scheme = $1; | ||||
56 | } | ||||
57 | } | ||||
58 | $impclass ||= implementor($scheme) || | ||||
59 | 4 | 15µs | 4 | 3.51ms | do { # spent 3.51ms making 4 calls to URI::implementor, avg 877µs/call |
60 | require URI::_foreign; | ||||
61 | $impclass = 'URI::_foreign'; | ||||
62 | }; | ||||
63 | |||||
64 | 4 | 29µs | 4 | 2.37ms | return $impclass->_init($uri, $scheme); # spent 2.37ms making 4 calls to URI::_init, avg 592µs/call |
65 | } | ||||
66 | |||||
67 | |||||
68 | sub new_abs | ||||
69 | { | ||||
70 | my($class, $uri, $base) = @_; | ||||
71 | $uri = $class->new($uri, $base); | ||||
72 | $uri->abs($base); | ||||
73 | } | ||||
74 | |||||
75 | |||||
76 | sub _init | ||||
77 | # spent 2.37ms (131µs+2.24) within URI::_init which was called 4 times, avg 592µs/call:
# 4 times (131µs+2.24ms) by URI::new at line 64, avg 592µs/call | ||||
78 | 4 | 2µs | my $class = shift; | ||
79 | 4 | 5µs | my($str, $scheme) = @_; | ||
80 | # find all funny characters and encode the bytes. | ||||
81 | 4 | 12µs | 4 | 2.22ms | $str = $class->_uric_escape($str); # spent 2.22ms making 4 calls to URI::_server::_uric_escape, avg 554µs/call |
82 | 4 | 31µs | 5 | 19µs | $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o || # spent 12µs making 1 call to URI::CORE:regcomp
# spent 7µs making 4 calls to URI::CORE:match, avg 2µs/call |
83 | $class->_no_scheme_ok; | ||||
84 | 4 | 83µs | my $self = bless \$str, $class; | ||
85 | 4 | 13µs | $self; | ||
86 | } | ||||
87 | |||||
88 | |||||
89 | sub _uric_escape | ||||
90 | # spent 39µs (27+12) within URI::_uric_escape which was called 4 times, avg 10µs/call:
# 4 times (27µs+12µs) by URI::_server::_uric_escape at line 18 of URI/_server.pm, avg 10µs/call | ||||
91 | 4 | 5µs | my($class, $str) = @_; | ||
92 | 4 | 24µs | 5 | 12µs | $str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego; # spent 7µs making 1 call to URI::CORE:regcomp
# spent 5µs making 4 calls to URI::CORE:subst, avg 1µs/call |
93 | 4 | 12µs | return $str; | ||
94 | } | ||||
95 | |||||
96 | |||||
97 | sub implementor | ||||
98 | # spent 3.51ms (3.28+229µs) within URI::implementor which was called 4 times, avg 877µs/call:
# 4 times (3.28ms+229µs) by URI::new at line 59, avg 877µs/call | ||||
99 | 4 | 5µs | my($scheme, $impclass) = @_; | ||
100 | 4 | 36µs | 5 | 19µs | if (!$scheme || $scheme !~ /\A$scheme_re\z/o) { # spent 12µs making 1 call to URI::CORE:regcomp
# spent 7µs making 4 calls to URI::CORE:match, avg 2µs/call |
101 | require URI::_generic; | ||||
102 | return "URI::_generic"; | ||||
103 | } | ||||
104 | |||||
105 | 4 | 4µs | $scheme = lc($scheme); | ||
106 | |||||
107 | 4 | 1µs | if ($impclass) { | ||
108 | # Set the implementor class for a given scheme | ||||
109 | my $old = $implements{$scheme}; | ||||
110 | $impclass->_init_implementor($scheme); | ||||
111 | $implements{$scheme} = $impclass; | ||||
112 | return $old; | ||||
113 | } | ||||
114 | |||||
115 | 4 | 4µs | my $ic = $implements{$scheme}; | ||
116 | 4 | 10µs | return $ic if $ic; | ||
117 | |||||
118 | # scheme not yet known, look for internal or | ||||
119 | # preloaded (with 'use') implementation | ||||
120 | 1 | 900ns | $ic = "URI::$scheme"; # default location | ||
121 | |||||
122 | # turn scheme into a valid perl identifier by a simple transformation... | ||||
123 | 1 | 5µs | 1 | 900ns | $ic =~ s/\+/_P/g; # spent 900ns making 1 call to URI::CORE:subst |
124 | 1 | 4µs | 1 | 800ns | $ic =~ s/\./_O/g; # spent 800ns making 1 call to URI::CORE:subst |
125 | 1 | 5µs | 1 | 600ns | $ic =~ s/\-/_/g; # spent 600ns making 1 call to URI::CORE:subst |
126 | |||||
127 | 3 | 12.9ms | 2 | 46µs | # spent 27µs (9+18) within URI::BEGIN@127 which was called
# once (9µs+18µs) by LWP::UserAgent::BEGIN@10 at line 127 # spent 27µs making 1 call to URI::BEGIN@127
# spent 19µs making 1 call to strict::unimport |
128 | # check we actually have one for the scheme: | ||||
129 | 1 | 9µs | unless (@{"${ic}::ISA"}) { | ||
130 | # Try to load it | ||||
131 | 1 | 175µs | eval "require $ic"; | ||
132 | 1 | 400ns | die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/; | ||
133 | 1 | 2µs | return unless @{"${ic}::ISA"}; | ||
134 | } | ||||
135 | |||||
136 | 1 | 6µs | 1 | 3µs | $ic->_init_implementor($scheme); # spent 3µs making 1 call to URI::_init_implementor |
137 | 1 | 1µs | $implements{$scheme} = $ic; | ||
138 | 1 | 3µs | $ic; | ||
139 | } | ||||
140 | |||||
141 | |||||
142 | sub _init_implementor | ||||
143 | # spent 3µs within URI::_init_implementor which was called
# once (3µs+0s) by URI::implementor at line 136 | ||||
144 | 1 | 5µs | my($class, $scheme) = @_; | ||
145 | # Remember that one implementor class may actually | ||||
146 | # serve to implement several URI schemes. | ||||
147 | } | ||||
148 | |||||
149 | |||||
150 | sub clone | ||||
151 | { | ||||
152 | my $self = shift; | ||||
153 | my $other = $$self; | ||||
154 | bless \$other, ref $self; | ||||
155 | } | ||||
156 | |||||
157 | |||||
158 | sub _no_scheme_ok { 0 } | ||||
159 | |||||
160 | sub _scheme | ||||
161 | { | ||||
162 | 9 | 4µs | my $self = shift; | ||
163 | |||||
164 | 9 | 4µs | unless (@_) { | ||
165 | 9 | 84µs | 10 | 48µs | return unless $$self =~ /^($scheme_re):/o; # spent 32µs making 9 calls to URI::CORE:match, avg 4µs/call
# spent 16µs making 1 call to URI::CORE:regcomp |
166 | 9 | 40µs | return $1; | ||
167 | } | ||||
168 | |||||
169 | my $old; | ||||
170 | my $new = shift; | ||||
171 | if (defined($new) && length($new)) { | ||||
172 | Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o; | ||||
173 | $old = $1 if $$self =~ s/^($scheme_re)://o; | ||||
174 | my $newself = URI->new("$new:$$self"); | ||||
175 | $$self = $$newself; | ||||
176 | bless $self, ref($newself); | ||||
177 | } | ||||
178 | else { | ||||
179 | if ($self->_no_scheme_ok) { | ||||
180 | $old = $1 if $$self =~ s/^($scheme_re)://o; | ||||
181 | Carp::carp("Oops, opaque part now look like scheme") | ||||
182 | if $^W && $$self =~ m/^$scheme_re:/o | ||||
183 | } | ||||
184 | else { | ||||
185 | $old = $1 if $$self =~ m/^($scheme_re):/o; | ||||
186 | } | ||||
187 | } | ||||
188 | |||||
189 | return $old; | ||||
190 | } | ||||
191 | |||||
192 | sub scheme | ||||
193 | # spent 116µs (41+74) within URI::scheme which was called 6 times, avg 19µs/call:
# 3 times (27µs+49µs) by LWP::UserAgent::prepare_request at line 211 of LWP/UserAgent.pm, avg 25µs/call
# 3 times (14µs+25µs) by LWP::UserAgent::send_request at line 120 of LWP/UserAgent.pm, avg 13µs/call | ||||
194 | 6 | 16µs | 6 | 74µs | my $scheme = shift->_scheme(@_); # spent 74µs making 6 calls to URI::_scheme, avg 12µs/call |
195 | 6 | 2µs | return unless defined $scheme; | ||
196 | 6 | 27µs | lc($scheme); | ||
197 | } | ||||
198 | |||||
199 | |||||
200 | sub opaque | ||||
201 | { | ||||
202 | my $self = shift; | ||||
203 | |||||
204 | unless (@_) { | ||||
205 | $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die; | ||||
206 | return $1; | ||||
207 | } | ||||
208 | |||||
209 | $$self =~ /^($scheme_re:)? # optional scheme | ||||
210 | ([^\#]*) # opaque | ||||
211 | (\#.*)? # optional fragment | ||||
212 | $/sx or die; | ||||
213 | |||||
214 | my $old_scheme = $1; | ||||
215 | my $old_opaque = $2; | ||||
216 | my $old_frag = $3; | ||||
217 | |||||
218 | my $new_opaque = shift; | ||||
219 | $new_opaque = "" unless defined $new_opaque; | ||||
220 | $new_opaque =~ s/([^$uric])/ URI::Escape::escape_char($1)/ego; | ||||
221 | |||||
222 | $$self = defined($old_scheme) ? $old_scheme : ""; | ||||
223 | $$self .= $new_opaque; | ||||
224 | $$self .= $old_frag if defined $old_frag; | ||||
225 | |||||
226 | $old_opaque; | ||||
227 | } | ||||
228 | |||||
229 | 1 | 2µs | *path = \&opaque; # alias | ||
230 | |||||
231 | |||||
232 | sub fragment | ||||
233 | { | ||||
234 | my $self = shift; | ||||
235 | unless (@_) { | ||||
236 | return unless $$self =~ /\#(.*)/s; | ||||
237 | return $1; | ||||
238 | } | ||||
239 | |||||
240 | my $old; | ||||
241 | $old = $1 if $$self =~ s/\#(.*)//s; | ||||
242 | |||||
243 | my $new_frag = shift; | ||||
244 | if (defined $new_frag) { | ||||
245 | $new_frag =~ s/([^$uric])/ URI::Escape::escape_char($1) /ego; | ||||
246 | $$self .= "#$new_frag"; | ||||
247 | } | ||||
248 | $old; | ||||
249 | } | ||||
250 | |||||
251 | |||||
252 | sub as_string | ||||
253 | # spent 12µs within URI::as_string which was called 3 times, avg 4µs/call:
# 3 times (12µs+0s) by SimpleDB::Client::construct_request at line 176 of ../lib/SimpleDB/Client.pm, avg 4µs/call | ||||
254 | 3 | 2µs | my $self = shift; | ||
255 | 3 | 12µs | $$self; | ||
256 | } | ||||
257 | |||||
258 | |||||
259 | sub as_iri | ||||
260 | { | ||||
261 | my $self = shift; | ||||
262 | my $str = $$self; | ||||
263 | if ($str =~ /\bxn--/ && $self->can("ihost")) { | ||||
264 | my $ihost = $self->ihost; | ||||
265 | if ($ihost) { | ||||
266 | my $u = $self->clone; | ||||
267 | $u->host("%%host%%"); | ||||
268 | $str = $u->as_string; | ||||
269 | $str =~ s/%%host%%/$ihost/; | ||||
270 | } | ||||
271 | } | ||||
272 | if ($str =~ s/%([89A-F][0-9A-F])/chr(hex($1))/eg) { | ||||
273 | # All this crap because the more obvious: | ||||
274 | # | ||||
275 | # Encode::decode("UTF-8", $str, sub { sprintf "%%%02X", shift }) | ||||
276 | # | ||||
277 | # doesn't work. Apparently passing a sub as CHECK only works | ||||
278 | # for 'ascii' and similar direct encodings. | ||||
279 | |||||
280 | require Encode; | ||||
281 | my $enc = Encode::find_encoding("UTF-8"); | ||||
282 | my $u = ""; | ||||
283 | while (length $str) { | ||||
284 | $u .= $enc->decode($str, Encode::FB_QUIET()); | ||||
285 | if (length $str) { | ||||
286 | # escape next char | ||||
287 | $u .= URI::Escape::escape_char(substr($str, 0, 1, "")); | ||||
288 | } | ||||
289 | } | ||||
290 | $str = $u; | ||||
291 | } | ||||
292 | return $str; | ||||
293 | } | ||||
294 | |||||
295 | |||||
296 | sub canonical | ||||
297 | # spent 107µs (53+53) within URI::canonical which was called 3 times, avg 36µs/call:
# 3 times (53µs+53µs) by URI::_server::canonical at line 145 of URI/_server.pm, avg 36µs/call | ||||
298 | # Make sure scheme is lowercased, that we don't escape unreserved chars, | ||||
299 | # and that we use upcase escape sequences. | ||||
300 | |||||
301 | 3 | 2µs | my $self = shift; | ||
302 | 3 | 16µs | 3 | 49µs | my $scheme = $self->_scheme || ""; # spent 49µs making 3 calls to URI::_scheme, avg 16µs/call |
303 | 3 | 12µs | 3 | 2µs | my $uc_scheme = $scheme =~ /[A-Z]/; # spent 2µs making 3 calls to URI::CORE:match, avg 767ns/call |
304 | 3 | 11µs | 3 | 2µs | my $esc = $$self =~ /%[a-fA-F0-9]{2}/; # spent 2µs making 3 calls to URI::CORE:match, avg 767ns/call |
305 | 3 | 14µs | return $self unless $uc_scheme || $esc; | ||
306 | |||||
307 | my $other = $self->clone; | ||||
308 | if ($uc_scheme) { | ||||
309 | $other->_scheme(lc $scheme); | ||||
310 | } | ||||
311 | if ($esc) { | ||||
312 | $$other =~ s{%([0-9a-fA-F]{2})} | ||||
313 | { my $a = chr(hex($1)); | ||||
314 | $a =~ /^[$unreserved]\z/o ? $a : "%\U$1" | ||||
315 | }ge; | ||||
316 | } | ||||
317 | return $other; | ||||
318 | } | ||||
319 | |||||
320 | # Compare two URIs, subclasses will provide a more correct implementation | ||||
321 | sub eq { | ||||
322 | my($self, $other) = @_; | ||||
323 | $self = URI->new($self, $other) unless ref $self; | ||||
324 | $other = URI->new($other, $self) unless ref $other; | ||||
325 | ref($self) eq ref($other) && # same class | ||||
326 | $self->canonical->as_string eq $other->canonical->as_string; | ||||
327 | } | ||||
328 | |||||
329 | # generic-URI transformation methods | ||||
330 | sub abs { $_[0]; } | ||||
331 | sub rel { $_[0]; } | ||||
332 | |||||
333 | # help out Storable | ||||
334 | sub STORABLE_freeze { | ||||
335 | my($self, $cloning) = @_; | ||||
336 | return $$self; | ||||
337 | } | ||||
338 | |||||
339 | sub STORABLE_thaw { | ||||
340 | my($self, $cloning, $str) = @_; | ||||
341 | $$self = $str; | ||||
342 | } | ||||
343 | |||||
344 | 1 | 16µs | 1; | ||
345 | |||||
346 | __END__ | ||||
347 | |||||
348 | =head1 NAME | ||||
349 | |||||
350 | URI - Uniform Resource Identifiers (absolute and relative) | ||||
351 | |||||
352 | =head1 SYNOPSIS | ||||
353 | |||||
354 | $u1 = URI->new("http://www.perl.com"); | ||||
355 | $u2 = URI->new("foo", "http"); | ||||
356 | $u3 = $u2->abs($u1); | ||||
357 | $u4 = $u3->clone; | ||||
358 | $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical; | ||||
359 | |||||
360 | $str = $u->as_string; | ||||
361 | $str = "$u"; | ||||
362 | |||||
363 | $scheme = $u->scheme; | ||||
364 | $opaque = $u->opaque; | ||||
365 | $path = $u->path; | ||||
366 | $frag = $u->fragment; | ||||
367 | |||||
368 | $u->scheme("ftp"); | ||||
369 | $u->host("ftp.perl.com"); | ||||
370 | $u->path("cpan/"); | ||||
371 | |||||
372 | =head1 DESCRIPTION | ||||
373 | |||||
374 | This module implements the C<URI> class. Objects of this class | ||||
375 | represent "Uniform Resource Identifier references" as specified in RFC | ||||
376 | 2396 (and updated by RFC 2732). | ||||
377 | |||||
378 | A Uniform Resource Identifier is a compact string of characters that | ||||
379 | identifies an abstract or physical resource. A Uniform Resource | ||||
380 | Identifier can be further classified as either a Uniform Resource Locator | ||||
381 | (URL) or a Uniform Resource Name (URN). The distinction between URL | ||||
382 | and URN does not matter to the C<URI> class interface. A | ||||
383 | "URI-reference" is a URI that may have additional information attached | ||||
384 | in the form of a fragment identifier. | ||||
385 | |||||
386 | An absolute URI reference consists of three parts: a I<scheme>, a | ||||
387 | I<scheme-specific part> and a I<fragment> identifier. A subset of URI | ||||
388 | references share a common syntax for hierarchical namespaces. For | ||||
389 | these, the scheme-specific part is further broken down into | ||||
390 | I<authority>, I<path> and I<query> components. These URIs can also | ||||
391 | take the form of relative URI references, where the scheme (and | ||||
392 | usually also the authority) component is missing, but implied by the | ||||
393 | context of the URI reference. The three forms of URI reference | ||||
394 | syntax are summarized as follows: | ||||
395 | |||||
396 | <scheme>:<scheme-specific-part>#<fragment> | ||||
397 | <scheme>://<authority><path>?<query>#<fragment> | ||||
398 | <path>?<query>#<fragment> | ||||
399 | |||||
400 | The components into which a URI reference can be divided depend on the | ||||
401 | I<scheme>. The C<URI> class provides methods to get and set the | ||||
402 | individual components. The methods available for a specific | ||||
403 | C<URI> object depend on the scheme. | ||||
404 | |||||
405 | =head1 CONSTRUCTORS | ||||
406 | |||||
407 | The following methods construct new C<URI> objects: | ||||
408 | |||||
409 | =over 4 | ||||
410 | |||||
411 | =item $uri = URI->new( $str ) | ||||
412 | |||||
413 | =item $uri = URI->new( $str, $scheme ) | ||||
414 | |||||
415 | Constructs a new URI object. The string | ||||
416 | representation of a URI is given as argument, together with an optional | ||||
417 | scheme specification. Common URI wrappers like "" and <>, as well as | ||||
418 | leading and trailing white space, are automatically removed from | ||||
419 | the $str argument before it is processed further. | ||||
420 | |||||
421 | The constructor determines the scheme, maps this to an appropriate | ||||
422 | URI subclass, constructs a new object of that class and returns it. | ||||
423 | |||||
424 | The $scheme argument is only used when $str is a | ||||
425 | relative URI. It can be either a simple string that | ||||
426 | denotes the scheme, a string containing an absolute URI reference, or | ||||
427 | an absolute C<URI> object. If no $scheme is specified for a relative | ||||
428 | URI $str, then $str is simply treated as a generic URI (no scheme-specific | ||||
429 | methods available). | ||||
430 | |||||
431 | The set of characters available for building URI references is | ||||
432 | restricted (see L<URI::Escape>). Characters outside this set are | ||||
433 | automatically escaped by the URI constructor. | ||||
434 | |||||
435 | =item $uri = URI->new_abs( $str, $base_uri ) | ||||
436 | |||||
437 | Constructs a new absolute URI object. The $str argument can | ||||
438 | denote a relative or absolute URI. If relative, then it is | ||||
439 | absolutized using $base_uri as base. The $base_uri must be an absolute | ||||
440 | URI. | ||||
441 | |||||
442 | =item $uri = URI::file->new( $filename ) | ||||
443 | |||||
444 | =item $uri = URI::file->new( $filename, $os ) | ||||
445 | |||||
446 | Constructs a new I<file> URI from a file name. See L<URI::file>. | ||||
447 | |||||
448 | =item $uri = URI::file->new_abs( $filename ) | ||||
449 | |||||
450 | =item $uri = URI::file->new_abs( $filename, $os ) | ||||
451 | |||||
452 | Constructs a new absolute I<file> URI from a file name. See | ||||
453 | L<URI::file>. | ||||
454 | |||||
455 | =item $uri = URI::file->cwd | ||||
456 | |||||
457 | Returns the current working directory as a I<file> URI. See | ||||
458 | L<URI::file>. | ||||
459 | |||||
460 | =item $uri->clone | ||||
461 | |||||
462 | Returns a copy of the $uri. | ||||
463 | |||||
464 | =back | ||||
465 | |||||
466 | =head1 COMMON METHODS | ||||
467 | |||||
468 | The methods described in this section are available for all C<URI> | ||||
469 | objects. | ||||
470 | |||||
471 | Methods that give access to components of a URI always return the | ||||
472 | old value of the component. The value returned is C<undef> if the | ||||
473 | component was not present. There is generally a difference between a | ||||
474 | component that is empty (represented as C<"">) and a component that is | ||||
475 | missing (represented as C<undef>). If an accessor method is given an | ||||
476 | argument, it updates the corresponding component in addition to | ||||
477 | returning the old value of the component. Passing an undefined | ||||
478 | argument removes the component (if possible). The description of | ||||
479 | each accessor method indicates whether the component is passed as | ||||
480 | an escaped or an unescaped string. A component that can be further | ||||
481 | divided into sub-parts are usually passed escaped, as unescaping might | ||||
482 | change its semantics. | ||||
483 | |||||
484 | The common methods available for all URI are: | ||||
485 | |||||
486 | =over 4 | ||||
487 | |||||
488 | =item $uri->scheme | ||||
489 | |||||
490 | =item $uri->scheme( $new_scheme ) | ||||
491 | |||||
492 | Sets and returns the scheme part of the $uri. If the $uri is | ||||
493 | relative, then $uri->scheme returns C<undef>. If called with an | ||||
494 | argument, it updates the scheme of $uri, possibly changing the | ||||
495 | class of $uri, and returns the old scheme value. The method croaks | ||||
496 | if the new scheme name is illegal; a scheme name must begin with a | ||||
497 | letter and must consist of only US-ASCII letters, numbers, and a few | ||||
498 | special marks: ".", "+", "-". This restriction effectively means | ||||
499 | that the scheme must be passed unescaped. Passing an undefined | ||||
500 | argument to the scheme method makes the URI relative (if possible). | ||||
501 | |||||
502 | Letter case does not matter for scheme names. The string | ||||
503 | returned by $uri->scheme is always lowercase. If you want the scheme | ||||
504 | just as it was written in the URI in its original case, | ||||
505 | you can use the $uri->_scheme method instead. | ||||
506 | |||||
507 | =item $uri->opaque | ||||
508 | |||||
509 | =item $uri->opaque( $new_opaque ) | ||||
510 | |||||
511 | Sets and returns the scheme-specific part of the $uri | ||||
512 | (everything between the scheme and the fragment) | ||||
513 | as an escaped string. | ||||
514 | |||||
515 | =item $uri->path | ||||
516 | |||||
517 | =item $uri->path( $new_path ) | ||||
518 | |||||
519 | Sets and returns the same value as $uri->opaque unless the URI | ||||
520 | supports the generic syntax for hierarchical namespaces. | ||||
521 | In that case the generic method is overridden to set and return | ||||
522 | the part of the URI between the I<host name> and the I<fragment>. | ||||
523 | |||||
524 | =item $uri->fragment | ||||
525 | |||||
526 | =item $uri->fragment( $new_frag ) | ||||
527 | |||||
528 | Returns the fragment identifier of a URI reference | ||||
529 | as an escaped string. | ||||
530 | |||||
531 | =item $uri->as_string | ||||
532 | |||||
533 | Returns a URI object to a plain ASCII string. URI objects are | ||||
534 | also converted to plain strings automatically by overloading. This | ||||
535 | means that $uri objects can be used as plain strings in most Perl | ||||
536 | constructs. | ||||
537 | |||||
538 | =item $uri->as_iri | ||||
539 | |||||
540 | Returns a Unicode string representing the URI. Escaped UTF-8 sequences | ||||
541 | representing non-ASCII characters are turned into their corresponding Unicode | ||||
542 | code point. | ||||
543 | |||||
544 | =item $uri->canonical | ||||
545 | |||||
546 | Returns a normalized version of the URI. The rules | ||||
547 | for normalization are scheme-dependent. They usually involve | ||||
548 | lowercasing the scheme and Internet host name components, | ||||
549 | removing the explicit port specification if it matches the default port, | ||||
550 | uppercasing all escape sequences, and unescaping octets that can be | ||||
551 | better represented as plain characters. | ||||
552 | |||||
553 | For efficiency reasons, if the $uri is already in normalized form, | ||||
554 | then a reference to it is returned instead of a copy. | ||||
555 | |||||
556 | =item $uri->eq( $other_uri ) | ||||
557 | |||||
558 | =item URI::eq( $first_uri, $other_uri ) | ||||
559 | |||||
560 | Tests whether two URI references are equal. URI references | ||||
561 | that normalize to the same string are considered equal. The method | ||||
562 | can also be used as a plain function which can also test two string | ||||
563 | arguments. | ||||
564 | |||||
565 | If you need to test whether two C<URI> object references denote the | ||||
566 | same object, use the '==' operator. | ||||
567 | |||||
568 | =item $uri->abs( $base_uri ) | ||||
569 | |||||
570 | Returns an absolute URI reference. If $uri is already | ||||
571 | absolute, then a reference to it is simply returned. If the $uri | ||||
572 | is relative, then a new absolute URI is constructed by combining the | ||||
573 | $uri and the $base_uri, and returned. | ||||
574 | |||||
575 | =item $uri->rel( $base_uri ) | ||||
576 | |||||
577 | Returns a relative URI reference if it is possible to | ||||
578 | make one that denotes the same resource relative to $base_uri. | ||||
579 | If not, then $uri is simply returned. | ||||
580 | |||||
581 | =back | ||||
582 | |||||
583 | =head1 GENERIC METHODS | ||||
584 | |||||
585 | The following methods are available to schemes that use the | ||||
586 | common/generic syntax for hierarchical namespaces. The descriptions of | ||||
587 | schemes below indicate which these are. Unknown schemes are | ||||
588 | assumed to support the generic syntax, and therefore the following | ||||
589 | methods: | ||||
590 | |||||
591 | =over 4 | ||||
592 | |||||
593 | =item $uri->authority | ||||
594 | |||||
595 | =item $uri->authority( $new_authority ) | ||||
596 | |||||
597 | Sets and returns the escaped authority component | ||||
598 | of the $uri. | ||||
599 | |||||
600 | =item $uri->path | ||||
601 | |||||
602 | =item $uri->path( $new_path ) | ||||
603 | |||||
604 | Sets and returns the escaped path component of | ||||
605 | the $uri (the part between the host name and the query or fragment). | ||||
606 | The path can never be undefined, but it can be the empty string. | ||||
607 | |||||
608 | =item $uri->path_query | ||||
609 | |||||
610 | =item $uri->path_query( $new_path_query ) | ||||
611 | |||||
612 | Sets and returns the escaped path and query | ||||
613 | components as a single entity. The path and the query are | ||||
614 | separated by a "?" character, but the query can itself contain "?". | ||||
615 | |||||
616 | =item $uri->path_segments | ||||
617 | |||||
618 | =item $uri->path_segments( $segment, ... ) | ||||
619 | |||||
620 | Sets and returns the path. In a scalar context, it returns | ||||
621 | the same value as $uri->path. In a list context, it returns the | ||||
622 | unescaped path segments that make up the path. Path segments that | ||||
623 | have parameters are returned as an anonymous array. The first element | ||||
624 | is the unescaped path segment proper; subsequent elements are escaped | ||||
625 | parameter strings. Such an anonymous array uses overloading so it can | ||||
626 | be treated as a string too, but this string does not include the | ||||
627 | parameters. | ||||
628 | |||||
629 | Note that absolute paths have the empty string as their first | ||||
630 | I<path_segment>, i.e. the I<path> C</foo/bar> have 3 | ||||
631 | I<path_segments>; "", "foo" and "bar". | ||||
632 | |||||
633 | =item $uri->query | ||||
634 | |||||
635 | =item $uri->query( $new_query ) | ||||
636 | |||||
637 | Sets and returns the escaped query component of | ||||
638 | the $uri. | ||||
639 | |||||
640 | =item $uri->query_form | ||||
641 | |||||
642 | =item $uri->query_form( $key1 => $val1, $key2 => $val2, ... ) | ||||
643 | |||||
644 | =item $uri->query_form( $key1 => $val1, $key2 => $val2, ..., $delim ) | ||||
645 | |||||
646 | =item $uri->query_form( \@key_value_pairs ) | ||||
647 | |||||
648 | =item $uri->query_form( \@key_value_pairs, $delim ) | ||||
649 | |||||
650 | =item $uri->query_form( \%hash ) | ||||
651 | |||||
652 | =item $uri->query_form( \%hash, $delim ) | ||||
653 | |||||
654 | Sets and returns query components that use the | ||||
655 | I<application/x-www-form-urlencoded> format. Key/value pairs are | ||||
656 | separated by "&", and the key is separated from the value by a "=" | ||||
657 | character. | ||||
658 | |||||
659 | The form can be set either by passing separate key/value pairs, or via | ||||
660 | an array or hash reference. Passing an empty array or an empty hash | ||||
661 | removes the query component, whereas passing no arguments at all leaves | ||||
662 | the component unchanged. The order of keys is undefined if a hash | ||||
663 | reference is passed. The old value is always returned as a list of | ||||
664 | separate key/value pairs. Assigning this list to a hash is unwise as | ||||
665 | the keys returned might repeat. | ||||
666 | |||||
667 | The values passed when setting the form can be plain strings or | ||||
668 | references to arrays of strings. Passing an array of values has the | ||||
669 | same effect as passing the key repeatedly with one value at a time. | ||||
670 | All the following statements have the same effect: | ||||
671 | |||||
672 | $uri->query_form(foo => 1, foo => 2); | ||||
673 | $uri->query_form(foo => [1, 2]); | ||||
674 | $uri->query_form([ foo => 1, foo => 2 ]); | ||||
675 | $uri->query_form([ foo => [1, 2] ]); | ||||
676 | $uri->query_form({ foo => [1, 2] }); | ||||
677 | |||||
678 | The $delim parameter can be passed as ";" to force the key/value pairs | ||||
679 | to be delimited by ";" instead of "&" in the query string. This | ||||
680 | practice is often recommended for URLs embedded in HTML or XML | ||||
681 | documents as this avoids the trouble of escaping the "&" character. | ||||
682 | You might also set the $URI::DEFAULT_QUERY_FORM_DELIMITER variable to | ||||
683 | ";" for the same global effect. | ||||
684 | |||||
685 | The C<URI::QueryParam> module can be loaded to add further methods to | ||||
686 | manipulate the form of a URI. See L<URI::QueryParam> for details. | ||||
687 | |||||
688 | =item $uri->query_keywords | ||||
689 | |||||
690 | =item $uri->query_keywords( $keywords, ... ) | ||||
691 | |||||
692 | =item $uri->query_keywords( \@keywords ) | ||||
693 | |||||
694 | Sets and returns query components that use the | ||||
695 | keywords separated by "+" format. | ||||
696 | |||||
697 | The keywords can be set either by passing separate keywords directly | ||||
698 | or by passing a reference to an array of keywords. Passing an empty | ||||
699 | array removes the query component, whereas passing no arguments at | ||||
700 | all leaves the component unchanged. The old value is always returned | ||||
701 | as a list of separate words. | ||||
702 | |||||
703 | =back | ||||
704 | |||||
705 | =head1 SERVER METHODS | ||||
706 | |||||
707 | For schemes where the I<authority> component denotes an Internet host, | ||||
708 | the following methods are available in addition to the generic | ||||
709 | methods. | ||||
710 | |||||
711 | =over 4 | ||||
712 | |||||
713 | =item $uri->userinfo | ||||
714 | |||||
715 | =item $uri->userinfo( $new_userinfo ) | ||||
716 | |||||
717 | Sets and returns the escaped userinfo part of the | ||||
718 | authority component. | ||||
719 | |||||
720 | For some schemes this is a user name and a password separated by | ||||
721 | a colon. This practice is not recommended. Embedding passwords in | ||||
722 | clear text (such as URI) has proven to be a security risk in almost | ||||
723 | every case where it has been used. | ||||
724 | |||||
725 | =item $uri->host | ||||
726 | |||||
727 | =item $uri->host( $new_host ) | ||||
728 | |||||
729 | Sets and returns the unescaped hostname. | ||||
730 | |||||
731 | If the $new_host string ends with a colon and a number, then this | ||||
732 | number also sets the port. | ||||
733 | |||||
734 | For IPv6 addresses the brackets around the raw address is removed in the return | ||||
735 | value from $uri->host. When setting the host attribute to an IPv6 address you | ||||
736 | can use a raw address or one enclosed in brackets. The address needs to be | ||||
737 | enclosed in brackets if you want to pass in a new port value as well. | ||||
738 | |||||
739 | =item $uri->ihost | ||||
740 | |||||
741 | Returns the host in Unicode form. Any IDNA A-labels are turned into U-labels. | ||||
742 | |||||
743 | =item $uri->port | ||||
744 | |||||
745 | =item $uri->port( $new_port ) | ||||
746 | |||||
747 | Sets and returns the port. The port is a simple integer | ||||
748 | that should be greater than 0. | ||||
749 | |||||
750 | If a port is not specified explicitly in the URI, then the URI scheme's default port | ||||
751 | is returned. If you don't want the default port | ||||
752 | substituted, then you can use the $uri->_port method instead. | ||||
753 | |||||
754 | =item $uri->host_port | ||||
755 | |||||
756 | =item $uri->host_port( $new_host_port ) | ||||
757 | |||||
758 | Sets and returns the host and port as a single | ||||
759 | unit. The returned value includes a port, even if it matches the | ||||
760 | default port. The host part and the port part are separated by a | ||||
761 | colon: ":". | ||||
762 | |||||
763 | For IPv6 addresses the bracketing is preserved; thus | ||||
764 | URI->new("http://[::1]/")->host_port returns "[::1]:80". Contrast this with | ||||
765 | $uri->host which will remove the brackets. | ||||
766 | |||||
767 | =item $uri->default_port | ||||
768 | |||||
769 | Returns the default port of the URI scheme to which $uri | ||||
770 | belongs. For I<http> this is the number 80, for I<ftp> this | ||||
771 | is the number 21, etc. The default port for a scheme can not be | ||||
772 | changed. | ||||
773 | |||||
774 | =back | ||||
775 | |||||
776 | =head1 SCHEME-SPECIFIC SUPPORT | ||||
777 | |||||
778 | Scheme-specific support is provided for the following URI schemes. For C<URI> | ||||
779 | objects that do not belong to one of these, you can only use the common and | ||||
780 | generic methods. | ||||
781 | |||||
782 | =over 4 | ||||
783 | |||||
784 | =item B<data>: | ||||
785 | |||||
786 | The I<data> URI scheme is specified in RFC 2397. It allows inclusion | ||||
787 | of small data items as "immediate" data, as if it had been included | ||||
788 | externally. | ||||
789 | |||||
790 | C<URI> objects belonging to the data scheme support the common methods | ||||
791 | and two new methods to access their scheme-specific components: | ||||
792 | $uri->media_type and $uri->data. See L<URI::data> for details. | ||||
793 | |||||
794 | =item B<file>: | ||||
795 | |||||
796 | An old specification of the I<file> URI scheme is found in RFC 1738. | ||||
797 | A new RFC 2396 based specification in not available yet, but file URI | ||||
798 | references are in common use. | ||||
799 | |||||
800 | C<URI> objects belonging to the file scheme support the common and | ||||
801 | generic methods. In addition, they provide two methods for mapping file URIs | ||||
802 | back to local file names; $uri->file and $uri->dir. See L<URI::file> | ||||
803 | for details. | ||||
804 | |||||
805 | =item B<ftp>: | ||||
806 | |||||
807 | An old specification of the I<ftp> URI scheme is found in RFC 1738. A | ||||
808 | new RFC 2396 based specification in not available yet, but ftp URI | ||||
809 | references are in common use. | ||||
810 | |||||
811 | C<URI> objects belonging to the ftp scheme support the common, | ||||
812 | generic and server methods. In addition, they provide two methods for | ||||
813 | accessing the userinfo sub-components: $uri->user and $uri->password. | ||||
814 | |||||
815 | =item B<gopher>: | ||||
816 | |||||
817 | The I<gopher> URI scheme is specified in | ||||
818 | <draft-murali-url-gopher-1996-12-04> and will hopefully be available | ||||
819 | as a RFC 2396 based specification. | ||||
820 | |||||
821 | C<URI> objects belonging to the gopher scheme support the common, | ||||
822 | generic and server methods. In addition, they support some methods for | ||||
823 | accessing gopher-specific path components: $uri->gopher_type, | ||||
824 | $uri->selector, $uri->search, $uri->string. | ||||
825 | |||||
826 | =item B<http>: | ||||
827 | |||||
828 | The I<http> URI scheme is specified in RFC 2616. | ||||
829 | The scheme is used to reference resources hosted by HTTP servers. | ||||
830 | |||||
831 | C<URI> objects belonging to the http scheme support the common, | ||||
832 | generic and server methods. | ||||
833 | |||||
834 | =item B<https>: | ||||
835 | |||||
836 | The I<https> URI scheme is a Netscape invention which is commonly | ||||
837 | implemented. The scheme is used to reference HTTP servers through SSL | ||||
838 | connections. Its syntax is the same as http, but the default | ||||
839 | port is different. | ||||
840 | |||||
841 | =item B<ldap>: | ||||
842 | |||||
843 | The I<ldap> URI scheme is specified in RFC 2255. LDAP is the | ||||
844 | Lightweight Directory Access Protocol. An ldap URI describes an LDAP | ||||
845 | search operation to perform to retrieve information from an LDAP | ||||
846 | directory. | ||||
847 | |||||
848 | C<URI> objects belonging to the ldap scheme support the common, | ||||
849 | generic and server methods as well as ldap-specific methods: $uri->dn, | ||||
850 | $uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See | ||||
851 | L<URI::ldap> for details. | ||||
852 | |||||
853 | =item B<ldapi>: | ||||
854 | |||||
855 | Like the I<ldap> URI scheme, but uses a UNIX domain socket. The | ||||
856 | server methods are not supported, and the local socket path is | ||||
857 | available as $uri->un_path. The I<ldapi> scheme is used by the | ||||
858 | OpenLDAP package. There is no real specification for it, but it is | ||||
859 | mentioned in various OpenLDAP manual pages. | ||||
860 | |||||
861 | =item B<ldaps>: | ||||
862 | |||||
863 | Like the I<ldap> URI scheme, but uses an SSL connection. This | ||||
864 | scheme is deprecated, as the preferred way is to use the I<start_tls> | ||||
865 | mechanism. | ||||
866 | |||||
867 | =item B<mailto>: | ||||
868 | |||||
869 | The I<mailto> URI scheme is specified in RFC 2368. The scheme was | ||||
870 | originally used to designate the Internet mailing address of an | ||||
871 | individual or service. It has (in RFC 2368) been extended to allow | ||||
872 | setting of other mail header fields and the message body. | ||||
873 | |||||
874 | C<URI> objects belonging to the mailto scheme support the common | ||||
875 | methods and the generic query methods. In addition, they support the | ||||
876 | following mailto-specific methods: $uri->to, $uri->headers. | ||||
877 | |||||
878 | Note that the "foo@example.com" part of a mailto is I<not> the | ||||
879 | C<userinfo> and C<host> but instead the C<path>. This allowed a | ||||
880 | mailto to contain multiple comma-seperated email addresses. | ||||
881 | |||||
882 | =item B<mms>: | ||||
883 | |||||
884 | The I<mms> URL specification can be found at L<http://sdp.ppona.com/> | ||||
885 | C<URI> objects belonging to the mms scheme support the common, | ||||
886 | generic, and server methods, with the exception of userinfo and | ||||
887 | query-related sub-components. | ||||
888 | |||||
889 | =item B<news>: | ||||
890 | |||||
891 | The I<news>, I<nntp> and I<snews> URI schemes are specified in | ||||
892 | <draft-gilman-news-url-01> and will hopefully be available as an RFC | ||||
893 | 2396 based specification soon. | ||||
894 | |||||
895 | C<URI> objects belonging to the news scheme support the common, | ||||
896 | generic and server methods. In addition, they provide some methods to | ||||
897 | access the path: $uri->group and $uri->message. | ||||
898 | |||||
899 | =item B<nntp>: | ||||
900 | |||||
901 | See I<news> scheme. | ||||
902 | |||||
903 | =item B<pop>: | ||||
904 | |||||
905 | The I<pop> URI scheme is specified in RFC 2384. The scheme is used to | ||||
906 | reference a POP3 mailbox. | ||||
907 | |||||
908 | C<URI> objects belonging to the pop scheme support the common, generic | ||||
909 | and server methods. In addition, they provide two methods to access the | ||||
910 | userinfo components: $uri->user and $uri->auth | ||||
911 | |||||
912 | =item B<rlogin>: | ||||
913 | |||||
914 | An old specification of the I<rlogin> URI scheme is found in RFC | ||||
915 | 1738. C<URI> objects belonging to the rlogin scheme support the | ||||
916 | common, generic and server methods. | ||||
917 | |||||
918 | =item B<rtsp>: | ||||
919 | |||||
920 | The I<rtsp> URL specification can be found in section 3.2 of RFC 2326. | ||||
921 | C<URI> objects belonging to the rtsp scheme support the common, | ||||
922 | generic, and server methods, with the exception of userinfo and | ||||
923 | query-related sub-components. | ||||
924 | |||||
925 | =item B<rtspu>: | ||||
926 | |||||
927 | The I<rtspu> URI scheme is used to talk to RTSP servers over UDP | ||||
928 | instead of TCP. The syntax is the same as rtsp. | ||||
929 | |||||
930 | =item B<rsync>: | ||||
931 | |||||
932 | Information about rsync is available from http://rsync.samba.org. | ||||
933 | C<URI> objects belonging to the rsync scheme support the common, | ||||
934 | generic and server methods. In addition, they provide methods to | ||||
935 | access the userinfo sub-components: $uri->user and $uri->password. | ||||
936 | |||||
937 | =item B<sip>: | ||||
938 | |||||
939 | The I<sip> URI specification is described in sections 19.1 and 25 | ||||
940 | of RFC 3261. C<URI> objects belonging to the sip scheme support the | ||||
941 | common, generic, and server methods with the exception of path related | ||||
942 | sub-components. In addition, they provide two methods to get and set | ||||
943 | I<sip> parameters: $uri->params_form and $uri->params. | ||||
944 | |||||
945 | =item B<sips>: | ||||
946 | |||||
947 | See I<sip> scheme. Its syntax is the same as sip, but the default | ||||
948 | port is different. | ||||
949 | |||||
950 | =item B<snews>: | ||||
951 | |||||
952 | See I<news> scheme. Its syntax is the same as news, but the default | ||||
953 | port is different. | ||||
954 | |||||
955 | =item B<telnet>: | ||||
956 | |||||
957 | An old specification of the I<telnet> URI scheme is found in RFC | ||||
958 | 1738. C<URI> objects belonging to the telnet scheme support the | ||||
959 | common, generic and server methods. | ||||
960 | |||||
961 | =item B<tn3270>: | ||||
962 | |||||
963 | These URIs are used like I<telnet> URIs but for connections to IBM | ||||
964 | mainframes. C<URI> objects belonging to the tn3270 scheme support the | ||||
965 | common, generic and server methods. | ||||
966 | |||||
967 | =item B<ssh>: | ||||
968 | |||||
969 | Information about ssh is available at http://www.openssh.com/. | ||||
970 | C<URI> objects belonging to the ssh scheme support the common, | ||||
971 | generic and server methods. In addition, they provide methods to | ||||
972 | access the userinfo sub-components: $uri->user and $uri->password. | ||||
973 | |||||
974 | =item B<urn>: | ||||
975 | |||||
976 | The syntax of Uniform Resource Names is specified in RFC 2141. C<URI> | ||||
977 | objects belonging to the urn scheme provide the common methods, and also the | ||||
978 | methods $uri->nid and $uri->nss, which return the Namespace Identifier | ||||
979 | and the Namespace-Specific String respectively. | ||||
980 | |||||
981 | The Namespace Identifier basically works like the Scheme identifier of | ||||
982 | URIs, and further divides the URN namespace. Namespace Identifier | ||||
983 | assignments are maintained at | ||||
984 | <http://www.iana.org/assignments/urn-namespaces>. | ||||
985 | |||||
986 | Letter case is not significant for the Namespace Identifier. It is | ||||
987 | always returned in lower case by the $uri->nid method. The $uri->_nid | ||||
988 | method can be used if you want it in its original case. | ||||
989 | |||||
990 | =item B<urn>:B<isbn>: | ||||
991 | |||||
992 | The C<urn:isbn:> namespace contains International Standard Book | ||||
993 | Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging | ||||
994 | to this namespace has the following extra methods (if the | ||||
995 | Business::ISBN module is available): $uri->isbn, | ||||
996 | $uri->isbn_publisher_code, $uri->isbn_group_code (formerly isbn_country_code, | ||||
997 | which is still supported by issues a deprecation warning), $uri->isbn_as_ean. | ||||
998 | |||||
999 | =item B<urn>:B<oid>: | ||||
1000 | |||||
1001 | The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is | ||||
1002 | described in RFC 3061. An object identifier consists of sequences of digits | ||||
1003 | separated by dots. A C<URI> object belonging to this namespace has an | ||||
1004 | additional method called $uri->oid that can be used to get/set the oid | ||||
1005 | value. In a list context, oid numbers are returned as separate elements. | ||||
1006 | |||||
1007 | =back | ||||
1008 | |||||
1009 | =head1 CONFIGURATION VARIABLES | ||||
1010 | |||||
1011 | The following configuration variables influence how the class and its | ||||
1012 | methods behave: | ||||
1013 | |||||
1014 | =over 4 | ||||
1015 | |||||
1016 | =item $URI::ABS_ALLOW_RELATIVE_SCHEME | ||||
1017 | |||||
1018 | Some older parsers used to allow the scheme name to be present in the | ||||
1019 | relative URL if it was the same as the base URL scheme. RFC 2396 says | ||||
1020 | that this should be avoided, but you can enable this old behaviour by | ||||
1021 | setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value. | ||||
1022 | The difference is demonstrated by the following examples: | ||||
1023 | |||||
1024 | URI->new("http:foo")->abs("http://host/a/b") | ||||
1025 | ==> "http:foo" | ||||
1026 | |||||
1027 | local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1; | ||||
1028 | URI->new("http:foo")->abs("http://host/a/b") | ||||
1029 | ==> "http:/host/a/foo" | ||||
1030 | |||||
1031 | |||||
1032 | =item $URI::ABS_REMOTE_LEADING_DOTS | ||||
1033 | |||||
1034 | You can also have the abs() method ignore excess ".." | ||||
1035 | segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS | ||||
1036 | to a TRUE value. The difference is demonstrated by the following | ||||
1037 | examples: | ||||
1038 | |||||
1039 | URI->new("../../../foo")->abs("http://host/a/b") | ||||
1040 | ==> "http://host/../../foo" | ||||
1041 | |||||
1042 | local $URI::ABS_REMOTE_LEADING_DOTS = 1; | ||||
1043 | URI->new("../../../foo")->abs("http://host/a/b") | ||||
1044 | ==> "http://host/foo" | ||||
1045 | |||||
1046 | =item $URI::DEFAULT_QUERY_FORM_DELIMITER | ||||
1047 | |||||
1048 | This value can be set to ";" to have the query form C<key=value> pairs | ||||
1049 | delimited by ";" instead of "&" which is the default. | ||||
1050 | |||||
1051 | =back | ||||
1052 | |||||
1053 | =head1 BUGS | ||||
1054 | |||||
1055 | Using regexp variables like $1 directly as arguments to the URI methods | ||||
1056 | does not work too well with current perl implementations. I would argue | ||||
1057 | that this is actually a bug in perl. The workaround is to quote | ||||
1058 | them. Example: | ||||
1059 | |||||
1060 | /(...)/ || die; | ||||
1061 | $u->query("$1"); | ||||
1062 | |||||
1063 | =head1 PARSING URIs WITH REGEXP | ||||
1064 | |||||
1065 | As an alternative to this module, the following (official) regular | ||||
1066 | expression can be used to decode a URI: | ||||
1067 | |||||
1068 | my($scheme, $authority, $path, $query, $fragment) = | ||||
1069 | $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; | ||||
1070 | |||||
1071 | The C<URI::Split> module provides the function uri_split() as a | ||||
1072 | readable alternative. | ||||
1073 | |||||
1074 | =head1 SEE ALSO | ||||
1075 | |||||
1076 | L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>, | ||||
1077 | L<URI::Split>, L<URI::Heuristic> | ||||
1078 | |||||
1079 | RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax", | ||||
1080 | Berners-Lee, Fielding, Masinter, August 1998. | ||||
1081 | |||||
1082 | http://www.iana.org/assignments/uri-schemes | ||||
1083 | |||||
1084 | http://www.iana.org/assignments/urn-namespaces | ||||
1085 | |||||
1086 | http://www.w3.org/Addressing/ | ||||
1087 | |||||
1088 | =head1 COPYRIGHT | ||||
1089 | |||||
1090 | Copyright 1995-2009 Gisle Aas. | ||||
1091 | |||||
1092 | Copyright 1995 Martijn Koster. | ||||
1093 | |||||
1094 | This program is free software; you can redistribute it and/or modify | ||||
1095 | it under the same terms as Perl itself. | ||||
1096 | |||||
1097 | =head1 AUTHORS / ACKNOWLEDGMENTS | ||||
1098 | |||||
1099 | This module is based on the C<URI::URL> module, which in turn was | ||||
1100 | (distantly) based on the C<wwwurl.pl> code in the libwww-perl for | ||||
1101 | perl4 developed by Roy Fielding, as part of the Arcadia project at the | ||||
1102 | University of California, Irvine, with contributions from Brooks | ||||
1103 | Cutter. | ||||
1104 | |||||
1105 | C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and | ||||
1106 | Martijn Koster with input from other people on the libwww-perl mailing | ||||
1107 | list. | ||||
1108 | |||||
1109 | C<URI> and related subclasses was developed by Gisle Aas. | ||||
1110 | |||||
1111 | =cut | ||||
# spent 63µs within URI::CORE:match which was called 27 times, avg 2µs/call:
# 9 times (32µs+0s) by URI::_scheme at line 165 of URI.pm, avg 4µs/call
# 4 times (12µs+0s) by URI::new at line 47 of URI.pm, avg 3µs/call
# 4 times (7µs+0s) by URI::implementor at line 100 of URI.pm, avg 2µs/call
# 4 times (7µs+0s) by URI::_init at line 82 of URI.pm, avg 2µs/call
# 3 times (2µs+0s) by URI::canonical at line 304 of URI.pm, avg 767ns/call
# 3 times (2µs+0s) by URI::canonical at line 303 of URI.pm, avg 767ns/call | |||||
# spent 74µs within URI::CORE:regcomp which was called 5 times, avg 15µs/call:
# once (27µs+0s) by URI::new at line 47 of URI.pm
# once (16µs+0s) by URI::_scheme at line 165 of URI.pm
# once (12µs+0s) by URI::_init at line 82 of URI.pm
# once (12µs+0s) by URI::implementor at line 100 of URI.pm
# once (7µs+0s) by URI::_uric_escape at line 92 of URI.pm | |||||
# spent 26µs within URI::CORE:subst which was called 23 times, avg 1µs/call:
# 4 times (7µs+0s) by URI::new at line 43 of URI.pm, avg 2µs/call
# 4 times (6µs+0s) by URI::new at line 44 of URI.pm, avg 1µs/call
# 4 times (5µs+0s) by URI::_uric_escape at line 92 of URI.pm, avg 1µs/call
# 4 times (4µs+0s) by URI::new at line 41 of URI.pm, avg 975ns/call
# 4 times (2µs+0s) by URI::new at line 42 of URI.pm, avg 500ns/call
# once (900ns+0s) by URI::implementor at line 123 of URI.pm
# once (800ns+0s) by URI::implementor at line 124 of URI.pm
# once (600ns+0s) by URI::implementor at line 125 of URI.pm |