← Index
NYTProf Performance Profile   « block view • line view • sub view »
For ddd2.pl
  Run on Tue May 25 16:52:24 2010
Reported on Tue May 25 16:56:45 2010

File /project/perl/lib/URI.pm
Statements Executed 20791
Statement Execution Time 278ms
Subroutines — ordered by exclusive time
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
4611161.0ms151msURI::::newURI::new
13831152.0ms64.6msURI::::_schemeURI::_scheme
13833150.0ms115msURI::::schemeURI::scheme
4611130.6ms38.2msURI::::_initURI::_init
4611130.5ms34.3msURI::::implementorURI::implementor
27664223.6ms23.6msURI::::CORE:matchURI::CORE:match (opcode)
23088216.7ms16.7msURI::::CORE:substURI::CORE:subst (opcode)
552138µs138µsURI::::CORE:regcompURI::CORE:regcomp (opcode)
11116µs16µsURI::::_init_implementorURI::_init_implementor
0000s0sURI::::BEGINURI::BEGIN
0000s0sURI::::STORABLE_freezeURI::STORABLE_freeze
0000s0sURI::::STORABLE_thawURI::STORABLE_thaw
0000s0sURI::::__ANON__[:24]URI::__ANON__[:24]
0000s0sURI::::__ANON__[:27]URI::__ANON__[:27]
0000s0sURI::::_no_scheme_okURI::_no_scheme_ok
0000s0sURI::::absURI::abs
0000s0sURI::::as_stringURI::as_string
0000s0sURI::::canonicalURI::canonical
0000s0sURI::::cloneURI::clone
0000s0sURI::::eqURI::eq
0000s0sURI::::fragmentURI::fragment
0000s0sURI::::new_absURI::new_abs
0000s0sURI::::opaqueURI::opaque
0000s0sURI::::relURI::rel
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package URI;
2
33105µs127µsuse strict;
# spent 27µs making 1 call to strict::import
43117µs1143µsuse vars qw($VERSION);
# spent 143µs making 1 call to vars::import
517µs$VERSION = "1.35"; # $Date: 2004/11/05 14:17:33 $
6
73116µs1225µsuse vars qw($ABS_REMOTE_LEADING_DOTS $ABS_ALLOW_RELATIVE_SCHEME);
# spent 225µs making 1 call to vars::import
8
915µsmy %implements; # mapping from scheme to implementor class
10
11# Some "official" character classes
12
133176µs1413µsuse vars qw($reserved $mark $unreserved $uric $scheme_re);
# spent 413µs making 1 call to vars::import
1417µs$reserved = q(;/?:@&=+$,[]);
1515µs$mark = q(-_.!~*'()); #'; emacs
1618µs$unreserved = "A-Za-z0-9\Q$mark\E";
17110µs$uric = quotemeta($reserved) . $unreserved . "%";
18
1915µs$scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';
20
21350µsuse Carp ();
2236.60msuse URI::Escape ();
23
249227.26msuse overload ('""' => sub { ${$_[0]} },
25 '==' => sub { overload::StrVal($_[0]) eq
26 overload::StrVal($_[1])
27 },
28 fallback => 1,
# spent 212µs making 1 call to overload::import
293985µs );
30
31sub new
32
# spent 151ms (61.0+89.8) within URI::new which was called 461 times, avg 327µs/call: # 461 times (61.0ms+89.8ms) by HTTP::Request::uri at line 82 of HTTP/Request.pm, avg 327µs/call
{
33461077.1ms my($class, $uri, $scheme) = @_;
34
35 $uri = defined ($uri) ? "$uri" : ""; # stringify
36 # Get rid of potential wrapping
37 $uri =~ s/^<(?:URL:)?(.*)>$/$1/; #
# spent 3.45ms making 461 calls to URI::CORE:subst, avg 7µs/call
38 $uri =~ s/^"(.*)"$/$1/;
# spent 2.32ms making 461 calls to URI::CORE:subst, avg 5µs/call
39 $uri =~ s/^\s+//;
# spent 3.72ms making 461 calls to URI::CORE:subst, avg 8µs/call
40 $uri =~ s/\s+$//;
# spent 2.76ms making 461 calls to URI::CORE:subst, avg 6µs/call
41
42 my $impclass;
43 if ($uri =~ m/^($scheme_re):/so) {
# spent 4.97ms making 461 calls to URI::CORE:match, avg 11µs/call # spent 41µs making 1 call to URI::CORE:regcomp
44 $scheme = $1;
45 }
46 else {
47 if (($impclass = ref($scheme))) {
48 $scheme = $scheme->scheme;
49 }
50 elsif ($scheme && $scheme =~ m/^($scheme_re)(?::|$)/o) {
51 $scheme = $1;
52 }
53 }
54 $impclass ||= implementor($scheme) ||
55 do {
# spent 34.3ms making 461 calls to URI::implementor, avg 74µs/call
56 require URI::_foreign;
57 $impclass = 'URI::_foreign';
58 };
59
60 return $impclass->_init($uri, $scheme);
# spent 38.2ms making 461 calls to URI::_init, avg 83µs/call
61}
62
63
64sub new_abs
65{
66 my($class, $uri, $base) = @_;
67 $uri = $class->new($uri, $base);
68 $uri->abs($base);
69}
70
71
72sub _init
73
# spent 38.2ms (30.6+7.56) within URI::_init which was called 461 times, avg 83µs/call: # 461 times (30.6ms+7.56ms) by URI::new at line 60, avg 83µs/call
{
74276638.8ms my $class = shift;
75 my($str, $scheme) = @_;
76 $str =~ s/([^$uric\#])/$URI::Escape::escapes{$1}/go;
# spent 4.40ms making 461 calls to URI::CORE:subst, avg 10µs/call # spent 22µs making 1 call to URI::CORE:regcomp
77 $str = "$scheme:$str" unless $str =~ /^$scheme_re:/o ||
# spent 3.11ms making 461 calls to URI::CORE:match, avg 7µs/call # spent 23µs making 1 call to URI::CORE:regcomp
78 $class->_no_scheme_ok;
79 my $self = bless \$str, $class;
80 $self;
81}
82
83
84sub implementor
85
# spent 34.3ms (30.5+3.84) within URI::implementor which was called 461 times, avg 74µs/call: # 461 times (30.5ms+3.84ms) by URI::new at line 55, avg 74µs/call
{
86277726.5ms my($scheme, $impclass) = @_;
87 if (!$scheme || $scheme !~ /\A$scheme_re\z/o) {
# spent 2.93ms making 461 calls to URI::CORE:match, avg 6µs/call # spent 21µs making 1 call to URI::CORE:regcomp
88 require URI::_generic;
89 return "URI::_generic";
90 }
91
92 $scheme = lc($scheme);
93
94 if ($impclass) {
95 # Set the implementor class for a given scheme
96 my $old = $implements{$scheme};
97 $impclass->_init_implementor($scheme);
98 $implements{$scheme} = $impclass;
99 return $old;
100 }
101
102 my $ic = $implements{$scheme};
103 return $ic if $ic;
104
105 # scheme not yet known, look for internal or
106 # preloaded (with 'use') implementation
107 $ic = "URI::$scheme"; # default location
108
109 # turn scheme into a valid perl identifier by a simple tranformation...
110 $ic =~ s/\+/_P/g;
# spent 5µs making 1 call to URI::CORE:subst
111 $ic =~ s/\./_O/g;
# spent 6µs making 1 call to URI::CORE:subst
112 $ic =~ s/\-/_/g;
# spent 4µs making 1 call to URI::CORE:subst
113
11432.41ms1101µs no strict 'refs';
# spent 101µs making 1 call to strict::unimport
115 # check we actually have one for the scheme:
116 unless (@{"${ic}::ISA"}) {
117 # Try to load it
1181265µs eval "require $ic";
119 die $@ if $@ && $@ !~ /Can\'t locate.*in \@INC/;
120 return unless @{"${ic}::ISA"};
121 }
122
123 $ic->_init_implementor($scheme);
# spent 16µs making 1 call to URI::_init_implementor
124 $implements{$scheme} = $ic;
125 $ic;
126}
127
128
129sub _init_implementor
130
# spent 16µs within URI::_init_implementor which was called # once (16µs+0s) by URI::implementor at line 123
{
131120µs my($class, $scheme) = @_;
132 # Remember that one implementor class may actually
133 # serve to implement several URI schemes.
134}
135
136
137sub clone
138{
139 my $self = shift;
140 my $other = $$self;
141 bless \$other, ref $self;
142}
143
144
145sub _no_scheme_ok { 0 }
146
147sub _scheme
148
# spent 64.6ms (52.0+12.6) within URI::_scheme which was called 1383 times, avg 47µs/call: # 1383 times (52.0ms+12.6ms) by URI::scheme at line 181, avg 47µs/call
{
149553267.3ms my $self = shift;
150
151 unless (@_) {
152 return unless $$self =~ /^($scheme_re):/o;
# spent 12.5ms making 1383 calls to URI::CORE:match, avg 9µs/call # spent 31µs making 1 call to URI::CORE:regcomp
153 return $1;
154 }
155
156 my $old;
157 my $new = shift;
158 if (defined($new) && length($new)) {
159 Carp::croak("Bad scheme '$new'") unless $new =~ /^$scheme_re$/o;
160 $old = $1 if $$self =~ s/^($scheme_re)://o;
161 my $newself = URI->new("$new:$$self");
162 $$self = $$newself;
163 bless $self, ref($newself);
164 }
165 else {
166 if ($self->_no_scheme_ok) {
167 $old = $1 if $$self =~ s/^($scheme_re)://o;
168 Carp::carp("Oops, opaque part now look like scheme")
169 if $^W && $$self =~ m/^$scheme_re:/o
170 }
171 else {
172 $old = $1 if $$self =~ m/^($scheme_re):/o;
173 }
174 }
175
176 return $old;
177}
178
179sub scheme
180
# spent 115ms (50.0+64.6) within URI::scheme which was called 1383 times, avg 83µs/call: # 461 times (18.0ms+24.3ms) by LWP::UserAgent::send_request at line 146 of LWP/UserAgent.pm, avg 92µs/call # 461 times (16.0ms+20.4ms) by LWP::UserAgent::_need_proxy at line 777 of LWP/UserAgent.pm, avg 79µs/call # 461 times (16.0ms+19.9ms) by LWP::UserAgent::send_request at line 158 of LWP/UserAgent.pm, avg 78µs/call
{
181414950.5ms138364.6ms my $scheme = shift->_scheme(@_);
# spent 64.6ms making 1383 calls to URI::_scheme, avg 47µs/call
182 return unless defined $scheme;
183 lc($scheme);
184}
185
186
187sub opaque
188{
189 my $self = shift;
190
191 unless (@_) {
192 $$self =~ /^(?:$scheme_re:)?([^\#]*)/o or die;
193 return $1;
194 }
195
196 $$self =~ /^($scheme_re:)? # optional scheme
197 ([^\#]*) # opaque
198 (\#.*)? # optional fragment
199 $/sx or die;
200
201 my $old_scheme = $1;
202 my $old_opaque = $2;
203 my $old_frag = $3;
204
205 my $new_opaque = shift;
206 $new_opaque = "" unless defined $new_opaque;
207 $new_opaque =~ s/([^$uric])/$URI::Escape::escapes{$1}/go;
208
209 $$self = defined($old_scheme) ? $old_scheme : "";
210 $$self .= $new_opaque;
211 $$self .= $old_frag if defined $old_frag;
212
213 $old_opaque;
214}
215
216110µs*path = \&opaque; # alias
217
218
219sub fragment
220{
221 my $self = shift;
222 unless (@_) {
223 return unless $$self =~ /\#(.*)/s;
224 return $1;
225 }
226
227 my $old;
228 $old = $1 if $$self =~ s/\#(.*)//s;
229
230 my $new_frag = shift;
231 if (defined $new_frag) {
232 $new_frag =~ s/([^$uric])/$URI::Escape::escapes{$1}/go;
233 $$self .= "#$new_frag";
234 }
235 $old;
236}
237
238
239sub as_string
240{
241 my $self = shift;
242 $$self;
243}
244
245
246sub canonical
247{
248 # Make sure scheme is lowercased, that we don't escape unreserved chars,
249 # and that we use upcase escape sequences.
250
251 my $self = shift;
252 my $scheme = $self->_scheme || "";
253 my $uc_scheme = $scheme =~ /[A-Z]/;
254 my $esc = $$self =~ /%[a-fA-F0-9]{2}/;
255 return $self unless $uc_scheme || $esc;
256
257 my $other = $self->clone;
258 if ($uc_scheme) {
259 $other->_scheme(lc $scheme);
260 }
261 if ($esc) {
262 $$other =~ s{%([0-9a-fA-F]{2})}
263 { my $a = chr(hex($1));
264 $a =~ /^[$unreserved]\z/o ? $a : "%\U$1"
265 }ge;
266 }
267 return $other;
268}
269
270# Compare two URIs, subclasses will provide a more correct implementation
271sub eq {
272 my($self, $other) = @_;
273 $self = URI->new($self, $other) unless ref $self;
274 $other = URI->new($other, $self) unless ref $other;
275 ref($self) eq ref($other) && # same class
276 $self->canonical->as_string eq $other->canonical->as_string;
277}
278
279# generic-URI transformation methods
280sub abs { $_[0]; }
281sub rel { $_[0]; }
282
283# help out Storable
284sub STORABLE_freeze {
285 my($self, $cloning) = @_;
286 return $$self;
287}
288
289sub STORABLE_thaw {
290 my($self, $cloning, $str) = @_;
291 $$self = $str;
292}
293
294120µs1;
295
296__END__
297
298=head1 NAME
299
300URI - Uniform Resource Identifiers (absolute and relative)
301
302=head1 SYNOPSIS
303
304 $u1 = URI->new("http://www.perl.com");
305 $u2 = URI->new("foo", "http");
306 $u3 = $u2->abs($u1);
307 $u4 = $u3->clone;
308 $u5 = URI->new("HTTP://WWW.perl.com:80")->canonical;
309
310 $str = $u->as_string;
311 $str = "$u";
312
313 $scheme = $u->scheme;
314 $opaque = $u->opaque;
315 $path = $u->path;
316 $frag = $u->fragment;
317
318 $u->scheme("ftp");
319 $u->host("ftp.perl.com");
320 $u->path("cpan/");
321
322=head1 DESCRIPTION
323
324This module implements the C<URI> class. Objects of this class
325represent "Uniform Resource Identifier references" as specified in RFC
3262396 (and updated by RFC 2732).
327
328A Uniform Resource Identifier is a compact string of characters that
329identifies an abstract or physical resource. A Uniform Resource
330Identifier can be further classified as either a Uniform Resource Locator
331(URL) or a Uniform Resource Name (URN). The distinction between URL
332and URN does not matter to the C<URI> class interface. A
333"URI-reference" is a URI that may have additional information attached
334in the form of a fragment identifier.
335
336An absolute URI reference consists of three parts: a I<scheme>, a
337I<scheme-specific part> and a I<fragment> identifier. A subset of URI
338references share a common syntax for hierarchical namespaces. For
339these, the scheme-specific part is further broken down into
340I<authority>, I<path> and I<query> components. These URIs can also
341take the form of relative URI references, where the scheme (and
342usually also the authority) component is missing, but implied by the
343context of the URI reference. The three forms of URI reference
344syntax are summarized as follows:
345
346 <scheme>:<scheme-specific-part>#<fragment>
347 <scheme>://<authority><path>?<query>#<fragment>
348 <path>?<query>#<fragment>
349
350The components into which a URI reference can be divided depend on the
351I<scheme>. The C<URI> class provides methods to get and set the
352individual components. The methods available for a specific
353C<URI> object depend on the scheme.
354
355=head1 CONSTRUCTORS
356
357The following methods construct new C<URI> objects:
358
359=over 4
360
361=item $uri = URI->new( $str )
362
363=item $uri = URI->new( $str, $scheme )
364
365Constructs a new URI object. The string
366representation of a URI is given as argument, together with an optional
367scheme specification. Common URI wrappers like "" and <>, as well as
368leading and trailing white space, are automatically removed from
369the $str argument before it is processed further.
370
371The constructor determines the scheme, maps this to an appropriate
372URI subclass, constructs a new object of that class and returns it.
373
374The $scheme argument is only used when $str is a
375relative URI. It can be either a simple string that
376denotes the scheme, a string containing an absolute URI reference, or
377an absolute C<URI> object. If no $scheme is specified for a relative
378URI $str, then $str is simply treated as a generic URI (no scheme-specific
379methods available).
380
381The set of characters available for building URI references is
382restricted (see L<URI::Escape>). Characters outside this set are
383automatically escaped by the URI constructor.
384
385=item $uri = URI->new_abs( $str, $base_uri )
386
387Constructs a new absolute URI object. The $str argument can
388denote a relative or absolute URI. If relative, then it is
389absolutized using $base_uri as base. The $base_uri must be an absolute
390URI.
391
392=item $uri = URI::file->new( $filename )
393
394=item $uri = URI::file->new( $filename, $os )
395
396Constructs a new I<file> URI from a file name. See L<URI::file>.
397
398=item $uri = URI::file->new_abs( $filename )
399
400=item $uri = URI::file->new_abs( $filename, $os )
401
402Constructs a new absolute I<file> URI from a file name. See
403L<URI::file>.
404
405=item $uri = URI::file->cwd
406
407Returns the current working directory as a I<file> URI. See
408L<URI::file>.
409
410=item $uri->clone
411
412Returns a copy of the $uri.
413
414=back
415
416=head1 COMMON METHODS
417
418The methods described in this section are available for all C<URI>
419objects.
420
421Methods that give access to components of a URI always return the
422old value of the component. The value returned is C<undef> if the
423component was not present. There is generally a difference between a
424component that is empty (represented as C<"">) and a component that is
425missing (represented as C<undef>). If an accessor method is given an
426argument, it updates the corresponding component in addition to
427returning the old value of the component. Passing an undefined
428argument removes the component (if possible). The description of
429each accessor method indicates whether the component is passed as
430an escaped or an unescaped string. A component that can be further
431divided into sub-parts are usually passed escaped, as unescaping might
432change its semantics.
433
434The common methods available for all URI are:
435
436=over 4
437
438=item $uri->scheme
439
440=item $uri->scheme( $new_scheme )
441
442Sets and returns the scheme part of the $uri. If the $uri is
443relative, then $uri->scheme returns C<undef>. If called with an
444argument, it updates the scheme of $uri, possibly changing the
445class of $uri, and returns the old scheme value. The method croaks
446if the new scheme name is illegal; a scheme name must begin with a
447letter and must consist of only US-ASCII letters, numbers, and a few
448special marks: ".", "+", "-". This restriction effectively means
449that the scheme must be passed unescaped. Passing an undefined
450argument to the scheme method makes the URI relative (if possible).
451
452Letter case does not matter for scheme names. The string
453returned by $uri->scheme is always lowercase. If you want the scheme
454just as it was written in the URI in its original case,
455you can use the $uri->_scheme method instead.
456
457=item $uri->opaque
458
459=item $uri->opaque( $new_opaque )
460
461Sets and returns the scheme-specific part of the $uri
462(everything between the scheme and the fragment)
463as an escaped string.
464
465=item $uri->path
466
467=item $uri->path( $new_path )
468
469Sets and returns the same value as $uri->opaque unless the URI
470supports the generic syntax for hierarchical namespaces.
471In that case the generic method is overridden to set and return
472the part of the URI between the I<host name> and the I<fragment>.
473
474=item $uri->fragment
475
476=item $uri->fragment( $new_frag )
477
478Returns the fragment identifier of a URI reference
479as an escaped string.
480
481=item $uri->as_string
482
483Returns a URI object to a plain string. URI objects are
484also converted to plain strings automatically by overloading. This
485means that $uri objects can be used as plain strings in most Perl
486constructs.
487
488=item $uri->canonical
489
490Returns a normalized version of the URI. The rules
491for normalization are scheme-dependent. They usually involve
492lowercasing the scheme and Internet host name components,
493removing the explicit port specification if it matches the default port,
494uppercasing all escape sequences, and unescaping octets that can be
495better represented as plain characters.
496
497For efficiency reasons, if the $uri is already in normalized form,
498then a reference to it is returned instead of a copy.
499
500=item $uri->eq( $other_uri )
501
502=item URI::eq( $first_uri, $other_uri )
503
504Tests whether two URI references are equal. URI references
505that normalize to the same string are considered equal. The method
506can also be used as a plain function which can also test two string
507arguments.
508
509If you need to test whether two C<URI> object references denote the
510same object, use the '==' operator.
511
512=item $uri->abs( $base_uri )
513
514Returns an absolute URI reference. If $uri is already
515absolute, then a reference to it is simply returned. If the $uri
516is relative, then a new absolute URI is constructed by combining the
517$uri and the $base_uri, and returned.
518
519=item $uri->rel( $base_uri )
520
521Returns a relative URI reference if it is possible to
522make one that denotes the same resource relative to $base_uri.
523If not, then $uri is simply returned.
524
525=back
526
527=head1 GENERIC METHODS
528
529The following methods are available to schemes that use the
530common/generic syntax for hierarchical namespaces. The descriptions of
531schemes below indicate which these are. Unknown schemes are
532assumed to support the generic syntax, and therefore the following
533methods:
534
535=over 4
536
537=item $uri->authority
538
539=item $uri->authority( $new_authority )
540
541Sets and returns the escaped authority component
542of the $uri.
543
544=item $uri->path
545
546=item $uri->path( $new_path )
547
548Sets and returns the escaped path component of
549the $uri (the part between the host name and the query or fragment).
550The path can never be undefined, but it can be the empty string.
551
552=item $uri->path_query
553
554=item $uri->path_query( $new_path_query )
555
556Sets and returns the escaped path and query
557components as a single entity. The path and the query are
558separated by a "?" character, but the query can itself contain "?".
559
560=item $uri->path_segments
561
562=item $uri->path_segments( $segment, ... )
563
564Sets and returns the path. In a scalar context, it returns
565the same value as $uri->path. In a list context, it returns the
566unescaped path segments that make up the path. Path segments that
567have parameters are returned as an anonymous array. The first element
568is the unescaped path segment proper; subsequent elements are escaped
569parameter strings. Such an anonymous array uses overloading so it can
570be treated as a string too, but this string does not include the
571parameters.
572
573Note that absolute paths have the empty string as their first
574I<path_segment>, i.e. the I<path> C</foo/bar> have 3
575I<path_segments>; "", "foo" and "bar".
576
577=item $uri->query
578
579=item $uri->query( $new_query )
580
581Sets and returns the escaped query component of
582the $uri.
583
584=item $uri->query_form
585
586=item $uri->query_form( $key1 => $val1, $key2 => $val2, ... )
587
588=item $uri->query_form( \@key_value_pairs )
589
590=item $uri->query_form( \%hash )
591
592Sets and returns query components that use the
593I<application/x-www-form-urlencoded> format. Key/value pairs are
594separated by "&", and the key is separated from the value by a "="
595character.
596
597The form can be set either by passing separate key/value pairs, or via
598an array or hash reference. Passing an empty array or an empty hash
599removes the query component, whereas passing no arguments at all leaves
600the component unchanged. The order of keys is undefined if a hash
601reference is passed. The old value is always returned as a list of
602separate key/value pairs. Assigning this list to a hash is unwise as
603the keys returned might repeat.
604
605The values passed when setting the form can be plain strings or
606references to arrays of strings. Passing an array of values has the
607same effect as passing the key repeatedly with one value at a time.
608All the following statements have the same effect:
609
610 $uri->query_form(foo => 1, foo => 2);
611 $uri->query_form(foo => [1, 2]);
612 $uri->query_form([ foo => 1, foo => 2 ]);
613 $uri->query_form([ foo => [1, 2] ]);
614 $uri->query_form({ foo => [1, 2] });
615
616The C<URI::QueryParam> module can be loaded to add further methods to
617manipulate the form of a URI. See L<URI::QueryParam> for details.
618
619=item $uri->query_keywords
620
621=item $uri->query_keywords( $keywords, ... )
622
623=item $uri->query_keywords( \@keywords )
624
625Sets and returns query components that use the
626keywords separated by "+" format.
627
628The keywords can be set either by passing separate keywords directly
629or by passing a reference to an array of keywords. Passing an empty
630array removes the query component, whereas passing no arguments at
631all leaves the component unchanged. The old value is always returned
632as a list of separate words.
633
634=back
635
636=head1 SERVER METHODS
637
638For schemes where the I<authority> component denotes an Internet host,
639the following methods are available in addition to the generic
640methods.
641
642=over 4
643
644=item $uri->userinfo
645
646=item $uri->userinfo( $new_userinfo )
647
648Sets and returns the escaped userinfo part of the
649authority component.
650
651For some schemes this is a user name and a password separated by
652a colon. This practice is not recommended. Embedding passwords in
653clear text (such as URI) has proven to be a security risk in almost
654every case where it has been used.
655
656=item $uri->host
657
658=item $uri->host( $new_host )
659
660Sets and returns the unescaped hostname.
661
662If the $new_host string ends with a colon and a number, then this
663number also sets the port.
664
665=item $uri->port
666
667=item $uri->port( $new_port )
668
669Sets and returns the port. The port is a simple integer
670that should be greater than 0.
671
672If a port is not specified explicitly in the URI, then the URI scheme's default port
673is returned. If you don't want the default port
674substituted, then you can use the $uri->_port method instead.
675
676=item $uri->host_port
677
678=item $uri->host_port( $new_host_port )
679
680Sets and returns the host and port as a single
681unit. The returned value includes a port, even if it matches the
682default port. The host part and the port part are separated by a
683colon: ":".
684
685=item $uri->default_port
686
687Returns the default port of the URI scheme to which $uri
688belongs. For I<http> this is the number 80, for I<ftp> this
689is the number 21, etc. The default port for a scheme can not be
690changed.
691
692=back
693
694=head1 SCHEME-SPECIFIC SUPPORT
695
696Scheme-specific support is provided for the following URI schemes. For C<URI>
697objects that do not belong to one of these, you can only use the common and
698generic methods.
699
700=over 4
701
702=item B<data>:
703
704The I<data> URI scheme is specified in RFC 2397. It allows inclusion
705of small data items as "immediate" data, as if it had been included
706externally.
707
708C<URI> objects belonging to the data scheme support the common methods
709and two new methods to access their scheme-specific components:
710$uri->media_type and $uri->data. See L<URI::data> for details.
711
712=item B<file>:
713
714An old specification of the I<file> URI scheme is found in RFC 1738.
715A new RFC 2396 based specification in not available yet, but file URI
716references are in common use.
717
718C<URI> objects belonging to the file scheme support the common and
719generic methods. In addition, they provide two methods for mapping file URIs
720back to local file names; $uri->file and $uri->dir. See L<URI::file>
721for details.
722
723=item B<ftp>:
724
725An old specification of the I<ftp> URI scheme is found in RFC 1738. A
726new RFC 2396 based specification in not available yet, but ftp URI
727references are in common use.
728
729C<URI> objects belonging to the ftp scheme support the common,
730generic and server methods. In addition, they provide two methods for
731accessing the userinfo sub-components: $uri->user and $uri->password.
732
733=item B<gopher>:
734
735The I<gopher> URI scheme is specified in
736<draft-murali-url-gopher-1996-12-04> and will hopefully be available
737as a RFC 2396 based specification.
738
739C<URI> objects belonging to the gopher scheme support the common,
740generic and server methods. In addition, they support some methods for
741accessing gopher-specific path components: $uri->gopher_type,
742$uri->selector, $uri->search, $uri->string.
743
744=item B<http>:
745
746The I<http> URI scheme is specified in RFC 2616.
747The scheme is used to reference resources hosted by HTTP servers.
748
749C<URI> objects belonging to the http scheme support the common,
750generic and server methods.
751
752=item B<https>:
753
754The I<https> URI scheme is a Netscape invention which is commonly
755implemented. The scheme is used to reference HTTP servers through SSL
756connections. Its syntax is the same as http, but the default
757port is different.
758
759=item B<ldap>:
760
761The I<ldap> URI scheme is specified in RFC 2255. LDAP is the
762Lightweight Directory Access Protocol. An ldap URI describes an LDAP
763search operation to perform to retrieve information from an LDAP
764directory.
765
766C<URI> objects belonging to the ldap scheme support the common,
767generic and server methods as well as ldap-specific methods: $uri->dn,
768$uri->attributes, $uri->scope, $uri->filter, $uri->extensions. See
769L<URI::ldap> for details.
770
771=item B<ldapi>:
772
773Like the I<ldap> URI scheme, but uses a UNIX domain socket. The
774server methods are not supported, and the local socket path is
775available as $uri->un_path. The I<ldapi> scheme is used by the
776OpenLDAP package. There is no real specification for it, but it is
777mentioned in various OpenLDAP manual pages.
778
779=item B<ldaps>:
780
781Like the I<ldap> URI scheme, but uses an SSL connection. This
782scheme is deprecated, as the preferred way is to use the I<start_tls>
783mechanism.
784
785=item B<mailto>:
786
787The I<mailto> URI scheme is specified in RFC 2368. The scheme was
788originally used to designate the Internet mailing address of an
789individual or service. It has (in RFC 2368) been extended to allow
790setting of other mail header fields and the message body.
791
792C<URI> objects belonging to the mailto scheme support the common
793methods and the generic query methods. In addition, they support the
794following mailto-specific methods: $uri->to, $uri->headers.
795
796=item B<mms>:
797
798The I<mms> URL specification can be found at L<http://sdp.ppona.com/>
799C<URI> objects belonging to the mms scheme support the common,
800generic, and server methods, with the exception of userinfo and
801query-related sub-components.
802
803=item B<news>:
804
805The I<news>, I<nntp> and I<snews> URI schemes are specified in
806<draft-gilman-news-url-01> and will hopefully be available as an RFC
8072396 based specification soon.
808
809C<URI> objects belonging to the news scheme support the common,
810generic and server methods. In addition, they provide some methods to
811access the path: $uri->group and $uri->message.
812
813=item B<nntp>:
814
815See I<news> scheme.
816
817=item B<pop>:
818
819The I<pop> URI scheme is specified in RFC 2384. The scheme is used to
820reference a POP3 mailbox.
821
822C<URI> objects belonging to the pop scheme support the common, generic
823and server methods. In addition, they provide two methods to access the
824userinfo components: $uri->user and $uri->auth
825
826=item B<rlogin>:
827
828An old specification of the I<rlogin> URI scheme is found in RFC
8291738. C<URI> objects belonging to the rlogin scheme support the
830common, generic and server methods.
831
832=item B<rtsp>:
833
834The I<rtsp> URL specification can be found in section 3.2 of RFC 2326.
835C<URI> objects belonging to the rtsp scheme support the common,
836generic, and server methods, with the exception of userinfo and
837query-related sub-components.
838
839=item B<rtspu>:
840
841The I<rtspu> URI scheme is used to talk to RTSP servers over UDP
842instead of TCP. The syntax is the same as rtsp.
843
844=item B<rsync>:
845
846Information about rsync is available from http://rsync.samba.org.
847C<URI> objects belonging to the rsync scheme support the common,
848generic and server methods. In addition, they provide methods to
849access the userinfo sub-components: $uri->user and $uri->password.
850
851=item B<sip>:
852
853The I<sip> URI specification is described in sections 19.1 and 25
854of RFC 3261. C<URI> objects belonging to the sip scheme support the
855common, generic, and server methods with the exception of path related
856sub-components. In addition, they provide two methods to get and set
857I<sip> parameters: $uri->params_form and $uri->params.
858
859=item B<sips>:
860
861See I<sip> scheme. Its syntax is the same as sip, but the default
862port is different.
863
864=item B<snews>:
865
866See I<news> scheme. Its syntax is the same as news, but the default
867port is different.
868
869=item B<telnet>:
870
871An old specification of the I<telnet> URI scheme is found in RFC
8721738. C<URI> objects belonging to the telnet scheme support the
873common, generic and server methods.
874
875=item B<tn3270>:
876
877These URIs are used like I<telnet> URIs but for connections to IBM
878mainframes. C<URI> objects belonging to the tn3270 scheme support the
879common, generic and server methods.
880
881=item B<ssh>:
882
883Information about ssh is available at http://www.openssh.com/.
884C<URI> objects belonging to the ssh scheme support the common,
885generic and server methods. In addition, they provide methods to
886access the userinfo sub-components: $uri->user and $uri->password.
887
888=item B<urn>:
889
890The syntax of Uniform Resource Names is specified in RFC 2141. C<URI>
891objects belonging to the urn scheme provide the common methods, and also the
892methods $uri->nid and $uri->nss, which return the Namespace Identifier
893and the Namespace-Specific String respectively.
894
895The Namespace Identifier basically works like the Scheme identifier of
896URIs, and further divides the URN namespace. Namespace Identifier
897assignments are maintained at
898<http://www.iana.org/assignments/urn-namespaces>.
899
900Letter case is not significant for the Namespace Identifier. It is
901always returned in lower case by the $uri->nid method. The $uri->_nid
902method can be used if you want it in its original case.
903
904=item B<urn>:B<isbn>:
905
906The C<urn:isbn:> namespace contains International Standard Book
907Numbers (ISBNs) and is described in RFC 3187. A C<URI> object belonging
908to this namespace has the following extra methods (if the
909Business::ISBN module is available): $uri->isbn,
910$uri->isbn_publisher_code, $uri->isbn_country_code, $uri->isbn_as_ean.
911
912=item B<urn>:B<oid>:
913
914The C<urn:oid:> namespace contains Object Identifiers (OIDs) and is
915described in RFC 3061. An object identifier consists of sequences of digits
916separated by dots. A C<URI> object belonging to this namespace has an
917additional method called $uri->oid that can be used to get/set the oid
918value. In a list context, oid numbers are returned as separate elements.
919
920=back
921
922=head1 CONFIGURATION VARIABLES
923
924The following configuration variables influence how the class and its
925methods behave:
926
927=over 4
928
929=item $URI::ABS_ALLOW_RELATIVE_SCHEME
930
931Some older parsers used to allow the scheme name to be present in the
932relative URL if it was the same as the base URL scheme. RFC 2396 says
933that this should be avoided, but you can enable this old behaviour by
934setting the $URI::ABS_ALLOW_RELATIVE_SCHEME variable to a TRUE value.
935The difference is demonstrated by the following examples:
936
937 URI->new("http:foo")->abs("http://host/a/b")
938 ==> "http:foo"
939
940 local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
941 URI->new("http:foo")->abs("http://host/a/b")
942 ==> "http:/host/a/foo"
943
944
945=item $URI::ABS_REMOTE_LEADING_DOTS
946
947You can also have the abs() method ignore excess ".."
948segments in the relative URI by setting $URI::ABS_REMOTE_LEADING_DOTS
949to a TRUE value. The difference is demonstrated by the following
950examples:
951
952 URI->new("../../../foo")->abs("http://host/a/b")
953 ==> "http://host/../../foo"
954
955 local $URI::ABS_REMOTE_LEADING_DOTS = 1;
956 URI->new("../../../foo")->abs("http://host/a/b")
957 ==> "http://host/foo"
958
959=back
960
961=head1 BUGS
962
963Using regexp variables like $1 directly as arguments to the URI methods
964does not work too well with current perl implementations. I would argue
965that this is actually a bug in perl. The workaround is to quote
966them. Example:
967
968 /(...)/ || die;
969 $u->query("$1");
970
971=head1 PARSING URIs WITH REGEXP
972
973As an alternative to this module, the following (official) regular
974expression can be used to decode a URI:
975
976 my($scheme, $authority, $path, $query, $fragment) =
977 $uri =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
978
979The C<URI::Split> module provides the function uri_split() as a
980readable alternative.
981
982=head1 SEE ALSO
983
984L<URI::file>, L<URI::WithBase>, L<URI::QueryParam>, L<URI::Escape>,
985L<URI::Split>, L<URI::Heuristic>
986
987RFC 2396: "Uniform Resource Identifiers (URI): Generic Syntax",
988Berners-Lee, Fielding, Masinter, August 1998.
989
990http://www.iana.org/assignments/uri-schemes
991
992http://www.iana.org/assignments/urn-namespaces
993
994http://www.w3.org/Addressing/
995
996=head1 COPYRIGHT
997
998Copyright 1995-2003 Gisle Aas.
999
1000Copyright 1995 Martijn Koster.
1001
1002This program is free software; you can redistribute it and/or modify
1003it under the same terms as Perl itself.
1004
1005=head1 AUTHORS / ACKNOWLEDGMENTS
1006
1007This module is based on the C<URI::URL> module, which in turn was
1008(distantly) based on the C<wwwurl.pl> code in the libwww-perl for
1009perl4 developed by Roy Fielding, as part of the Arcadia project at the
1010University of California, Irvine, with contributions from Brooks
1011Cutter.
1012
1013C<URI::URL> was developed by Gisle Aas, Tim Bunce, Roy Fielding and
1014Martijn Koster with input from other people on the libwww-perl mailing
1015list.
1016
1017C<URI> and related subclasses was developed by Gisle Aas.
1018
1019=cut
# spent 23.6ms within URI::CORE:match which was called 2766 times, avg 9µs/call: # 1383 times (12.5ms+0s) by URI::_scheme at line 152 of URI.pm, avg 9µs/call # 461 times (4.97ms+0s) by URI::new at line 43 of URI.pm, avg 11µs/call # 461 times (3.11ms+0s) by URI::_init at line 77 of URI.pm, avg 7µs/call # 461 times (2.93ms+0s) by URI::implementor at line 87 of URI.pm, avg 6µs/call
sub URI::CORE:match; # xsub
# spent 138µs within URI::CORE:regcomp which was called 5 times, avg 28µs/call: # once (41µs+0s) by URI::new at line 43 of URI.pm # once (31µs+0s) by URI::_scheme at line 152 of URI.pm # once (23µs+0s) by URI::_init at line 77 of URI.pm # once (22µs+0s) by URI::_init at line 76 of URI.pm # once (21µs+0s) by URI::implementor at line 87 of URI.pm
sub URI::CORE:regcomp; # xsub
# spent 16.7ms within URI::CORE:subst which was called 2308 times, avg 7µs/call: # 461 times (4.40ms+0s) by URI::_init at line 76 of URI.pm, avg 10µs/call # 461 times (3.72ms+0s) by URI::new at line 39 of URI.pm, avg 8µs/call # 461 times (3.45ms+0s) by URI::new at line 37 of URI.pm, avg 7µs/call # 461 times (2.76ms+0s) by URI::new at line 40 of URI.pm, avg 6µs/call # 461 times (2.32ms+0s) by URI::new at line 38 of URI.pm, avg 5µs/call # once (6µs+0s) by URI::implementor at line 111 of URI.pm # once (5µs+0s) by URI::implementor at line 110 of URI.pm # once (4µs+0s) by URI::implementor at line 112 of URI.pm
sub URI::CORE:subst; # xsub