| File | /project/perl/lib/WWW/Google/PageRank.pm |
| Statements Executed | 368262 |
| Statement Execution Time | 4.39s |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 5532 | 2 | 1 | 1.95s | 3.00s | WWW::Google::PageRank::_mix |
| 108796 | 10 | 1 | 1.16s | 1.16s | WWW::Google::PageRank::_wsub |
| 922 | 2 | 1 | 637ms | 3.89s | WWW::Google::PageRank::_compute_ch |
| 461 | 1 | 1 | 314ms | 4.31s | WWW::Google::PageRank::_compute_ch_new |
| 20650 | 12 | 1 | 236ms | 236ms | WWW::Google::PageRank::_wadd |
| 461 | 1 | 1 | 106ms | 153s | WWW::Google::PageRank::get |
| 922 | 1 | 2 | 13.4ms | 13.4ms | WWW::Google::PageRank::CORE:unpack (opcode) |
| 922 | 2 | 2 | 8.45ms | 8.45ms | WWW::Google::PageRank::CORE:match (opcode) |
| 461 | 1 | 2 | 4.29ms | 4.29ms | WWW::Google::PageRank::CORE:pack (opcode) |
| 1 | 1 | 1 | 100µs | 455µs | WWW::Google::PageRank::new |
| 0 | 0 | 0 | 0s | 0s | WWW::Google::PageRank::BEGIN |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | package WWW::Google::PageRank; | ||||
| 2 | |||||
| 3 | # -*- perl -*- | ||||
| 4 | |||||
| 5 | 3 | 95µs | 1 | 28µs | use strict; # spent 28µs making 1 call to strict::import |
| 6 | 3 | 133µs | 1 | 123µs | use warnings; # spent 123µs making 1 call to warnings::import |
| 7 | |||||
| 8 | 3 | 94µs | 1 | 149µs | use vars qw($VERSION); # spent 149µs making 1 call to vars::import |
| 9 | |||||
| 10 | 3 | 7.71ms | 1 | 144µs | use LWP::UserAgent; # spent 144µs making 1 call to Exporter::import |
| 11 | 3 | 1.60ms | 1 | 212µs | use URI::Escape; # spent 212µs making 1 call to Exporter::import |
| 12 | |||||
| 13 | 1 | 7µs | $VERSION = '0.15'; | ||
| 14 | |||||
| 15 | # spent 455µs (100+355) within WWW::Google::PageRank::new which was called
# once (100µs+355µs) by main::RUNTIME at line 6 of ddd2.pl | ||||
| 16 | 8 | 96µs | my $class = shift; | ||
| 17 | my %par = @_; | ||||
| 18 | my $self; | ||||
| 19 | $self->{ua} = LWP::UserAgent->new(agent => $par{agent} || # spent 355µs making 1 call to LWP::UserAgent::new | ||||
| 20 | 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)') | ||||
| 21 | or return; | ||||
| 22 | $self->{ua}->proxy('http', $par{proxy}) if $par{proxy}; | ||||
| 23 | $self->{ua}->timeout($par{timeout}) if $par{timeout}; | ||||
| 24 | $self->{host} = $par{host} || 'toolbarqueries.google.com'; | ||||
| 25 | bless($self, $class); | ||||
| 26 | } | ||||
| 27 | |||||
| 28 | # spent 153s (106ms+152) within WWW::Google::PageRank::get which was called 461 times, avg 331ms/call:
# 461 times (106ms+152s) by main::RUNTIME at line 9 of ddd2.pl, avg 331ms/call | ||||
| 29 | 2766 | 68.5ms | my ($self, $url) = @_; | ||
| 30 | return unless defined $url and $url =~ m[^https?://]i; # spent 5.82ms making 461 calls to WWW::Google::PageRank::CORE:match, avg 13µs/call | ||||
| 31 | |||||
| 32 | my $ch = '6' . _compute_ch_new('info:' . $url); # spent 4.31s making 461 calls to WWW::Google::PageRank::_compute_ch_new, avg 9.34ms/call | ||||
| 33 | my $query = 'http://' . $self->{host} . '/search?client=navclient-auto&ch=' . $ch . # spent 120ms making 461 calls to URI::Escape::uri_escape, avg 261µs/call | ||||
| 34 | '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url); | ||||
| 35 | |||||
| 36 | my $resp = $self->{ua}->get($query); # spent 148s making 461 calls to LWP::UserAgent::get, avg 321ms/call | ||||
| 37 | 461 | 2.20ms | 1383 | 43.5ms | if ($resp->is_success && $resp->content =~ /Rank_\d+:\d+:(\d+)/) { # spent 20.5ms making 461 calls to HTTP::Message::content, avg 44µs/call
# spent 20.3ms making 461 calls to HTTP::Response::is_success, avg 44µs/call
# spent 2.63ms making 461 calls to WWW::Google::PageRank::CORE:match, avg 6µs/call |
| 38 | if (wantarray) { | ||||
| 39 | return ($1, $resp); | ||||
| 40 | } else { | ||||
| 41 | return $1; | ||||
| 42 | } | ||||
| 43 | } else { | ||||
| 44 | 454 | 6.71ms | if (wantarray) { | ||
| 45 | return (undef, $resp); | ||||
| 46 | } else { | ||||
| 47 | return; | ||||
| 48 | } | ||||
| 49 | } | ||||
| 50 | } | ||||
| 51 | |||||
| 52 | # spent 4.31s (314ms+3.99) within WWW::Google::PageRank::_compute_ch_new which was called 461 times, avg 9.34ms/call:
# 461 times (314ms+3.99s) by WWW::Google::PageRank::get at line 32, avg 9.34ms/call | ||||
| 53 | 1844 | 75.7ms | my $url = shift; | ||
| 54 | |||||
| 55 | my $ch = _compute_ch($url); # spent 1.62s making 461 calls to WWW::Google::PageRank::_compute_ch, avg 3.51ms/call | ||||
| 56 | $ch = (($ch % 0x0d) & 7) | (($ch / 7) << 2); | ||||
| 57 | |||||
| 58 | 27660 | 223ms | 10142 | 2.38s | return _compute_ch(pack("V20", map {my $t = $ch; _wsub($t, $_*9); $t} 0..19)); # spent 2.27s making 461 calls to WWW::Google::PageRank::_compute_ch, avg 4.93ms/call
# spent 101ms making 9220 calls to WWW::Google::PageRank::_wsub, avg 11µs/call
# spent 4.29ms making 461 calls to WWW::Google::PageRank::CORE:pack, avg 9µs/call |
| 59 | } | ||||
| 60 | |||||
| 61 | # spent 3.89s (637ms+3.25) within WWW::Google::PageRank::_compute_ch which was called 922 times, avg 4.22ms/call:
# 461 times (375ms+1.90s) by WWW::Google::PageRank::_compute_ch_new at line 58, avg 4.93ms/call
# 461 times (262ms+1.35s) by WWW::Google::PageRank::_compute_ch_new at line 55, avg 3.51ms/call | ||||
| 62 | 17518 | 232ms | my $url = shift; | ||
| 63 | |||||
| 64 | my @url = unpack("C*", $url); # spent 13.4ms making 922 calls to WWW::Google::PageRank::CORE:unpack, avg 15µs/call | ||||
| 65 | my ($a, $b, $c, $k) = (0x9e3779b9, 0x9e3779b9, 0xe6359a60, 0); | ||||
| 66 | my $len = scalar @url; | ||||
| 67 | |||||
| 68 | while ($len >= 12) { | ||||
| 69 | 27660 | 361ms | 4610 | 57.1ms | _wadd($a, $url[$k+0] | ($url[$k+1] << 8) | ($url[$k+2] << 16) | ($url[$k+3] << 24)); # spent 57.1ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 12µs/call |
| 70 | _wadd($b, $url[$k+4] | ($url[$k+5] << 8) | ($url[$k+6] << 16) | ($url[$k+7] << 24)); # spent 51.1ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 71 | _wadd($c, $url[$k+8] | ($url[$k+9] << 8) | ($url[$k+10] << 16) | ($url[$k+11] << 24)); # spent 50.7ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 72 | |||||
| 73 | _mix($a, $b, $c); # spent 2.50s making 4610 calls to WWW::Google::PageRank::_mix, avg 542µs/call | ||||
| 74 | |||||
| 75 | $k += 12; | ||||
| 76 | $len -= 12; | ||||
| 77 | } | ||||
| 78 | |||||
| 79 | _wadd($c, scalar @url); # spent 10.6ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 80 | |||||
| 81 | _wadd($c, $url[$k+10] << 24) if $len > 10; | ||||
| 82 | _wadd($c, $url[$k+9] << 16) if $len > 9; | ||||
| 83 | _wadd($c, $url[$k+8] << 8) if $len > 8; | ||||
| 84 | _wadd($b, $url[$k+7] << 24) if $len > 7; # spent 5.10ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 85 | _wadd($b, $url[$k+6] << 16) if $len > 6; # spent 4.97ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 86 | _wadd($b, $url[$k+5] << 8) if $len > 5; # spent 4.98ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 87 | _wadd($b, $url[$k+4]) if $len > 4; # spent 9.15ms making 831 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 88 | _wadd($a, $url[$k+3] << 24) if $len > 3; # spent 10.4ms making 918 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 89 | _wadd($a, $url[$k+2] << 16) if $len > 2; # spent 10.1ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 90 | _wadd($a, $url[$k+1] << 8) if $len > 1; # spent 12.0ms making 922 calls to WWW::Google::PageRank::_wadd, avg 13µs/call | ||||
| 91 | _wadd($a, $url[$k]) if $len > 0; # spent 9.93ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
| 92 | |||||
| 93 | _mix($a, $b, $c); # spent 501ms making 922 calls to WWW::Google::PageRank::_mix, avg 543µs/call | ||||
| 94 | |||||
| 95 | return $c; # integer is positive always | ||||
| 96 | } | ||||
| 97 | |||||
| 98 | sub _mix { | ||||
| 99 | 160428 | 1.76s | my ($a, $b, $c) = @_; | ||
| 100 | |||||
| 101 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 13; # spent 119ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 102 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 8) % 4294967296; # spent 119ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 103 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >>13; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 104 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 12; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 105 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 16) % 4294967296; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 106 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 5; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 107 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 3; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 108 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 10) % 4294967296; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 109 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 15; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
| 110 | |||||
| 111 | @_[0 .. $#_] = ($a, $b, $c); | ||||
| 112 | } | ||||
| 113 | |||||
| 114 | 20650 | 285ms | # spent 236ms within WWW::Google::PageRank::_wadd which was called 20650 times, avg 11µs/call:
# 4610 times (57.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 69, avg 12µs/call
# 4610 times (51.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 70, avg 11µs/call
# 4610 times (50.7ms+0s) by WWW::Google::PageRank::_compute_ch at line 71, avg 11µs/call
# 922 times (12.0ms+0s) by WWW::Google::PageRank::_compute_ch at line 90, avg 13µs/call
# 922 times (10.6ms+0s) by WWW::Google::PageRank::_compute_ch at line 79, avg 11µs/call
# 922 times (10.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 89, avg 11µs/call
# 922 times (9.93ms+0s) by WWW::Google::PageRank::_compute_ch at line 91, avg 11µs/call
# 918 times (10.4ms+0s) by WWW::Google::PageRank::_compute_ch at line 88, avg 11µs/call
# 831 times (9.15ms+0s) by WWW::Google::PageRank::_compute_ch at line 87, avg 11µs/call
# 461 times (5.10ms+0s) by WWW::Google::PageRank::_compute_ch at line 84, avg 11µs/call
# 461 times (4.98ms+0s) by WWW::Google::PageRank::_compute_ch at line 86, avg 11µs/call
# 461 times (4.97ms+0s) by WWW::Google::PageRank::_compute_ch at line 85, avg 11µs/call | ||
| 115 | 108796 | 1.37s | # spent 1.16s within WWW::Google::PageRank::_wsub which was called 108796 times, avg 11µs/call:
# 11064 times (119ms+0s) by WWW::Google::PageRank::_mix at line 102, avg 11µs/call
# 11064 times (119ms+0s) by WWW::Google::PageRank::_mix at line 101, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 103, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 105, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 106, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 108, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 104, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 109, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 107, avg 11µs/call
# 9220 times (101ms+0s) by WWW::Google::PageRank::_compute_ch_new at line 58, avg 11µs/call | ||
| 116 | |||||
| 117 | 1 | 12µs | 1; | ||
| 118 | |||||
| 119 | |||||
| 120 | __END__ | ||||
| 121 | |||||
| 122 | =head1 NAME | ||||
| 123 | |||||
| 124 | WWW::Google::PageRank - Query google pagerank of page | ||||
| 125 | |||||
| 126 | =head1 SYNOPSIS | ||||
| 127 | |||||
| 128 | use WWW::Google::PageRank; | ||||
| 129 | my $pr = WWW::Google::PageRank->new; | ||||
| 130 | print scalar($pr->get('http://www.yahoo.com/')), "\n"; | ||||
| 131 | |||||
| 132 | =head1 DESCRIPTION | ||||
| 133 | |||||
| 134 | The C<WWW::Google::PageRank> is a class implementing a interface for | ||||
| 135 | querying google pagerank. | ||||
| 136 | |||||
| 137 | To use it, you should create C<WWW::Google::PageRank> object and use its | ||||
| 138 | method get(), to query page rank of URL. | ||||
| 139 | |||||
| 140 | It uses C<LWP::UserAgent> for making request to Google. | ||||
| 141 | |||||
| 142 | =head1 CONSTRUCTOR METHOD | ||||
| 143 | |||||
| 144 | =over 4 | ||||
| 145 | |||||
| 146 | =item $gpr = WWW::Google::PageRank->new(%options); | ||||
| 147 | |||||
| 148 | This method constructs a new C<WWW::Google::PageRank> object and returns it. | ||||
| 149 | Key/value pair arguments may be provided to set up the initial state. | ||||
| 150 | The following options correspond to attribute methods described below: | ||||
| 151 | |||||
| 152 | KEY DEFAULT | ||||
| 153 | ----------- -------------------- | ||||
| 154 | agent "Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)" | ||||
| 155 | proxy undef | ||||
| 156 | timeout undef | ||||
| 157 | host "toolbarqueries.google.com" | ||||
| 158 | |||||
| 159 | C<agent> specifies the header 'User-Agent' when querying Google. If | ||||
| 160 | the C<proxy> option is passed in, requests will be made through | ||||
| 161 | specified poxy. C<proxy> is the host which serve requests from Googlebar. | ||||
| 162 | |||||
| 163 | =back | ||||
| 164 | |||||
| 165 | =head1 QUERY METHOD | ||||
| 166 | |||||
| 167 | =over 4 | ||||
| 168 | |||||
| 169 | =item $pr = $gpr->get('http://www.yahoo.com'); | ||||
| 170 | |||||
| 171 | Queries Google for a specified pagerank URL and returns pagerank. If | ||||
| 172 | query successfull, integer value from 0 to 10 returned. If query fails | ||||
| 173 | for some reason (google unreachable, url does not begin from | ||||
| 174 | 'http://', undefined url passed) it return C<undef>. | ||||
| 175 | |||||
| 176 | In list context this function returns list from two elements where | ||||
| 177 | first is the result as in scalar context and the second is the | ||||
| 178 | C<HTTP::Response> object (returned by C<LWP::UserAgent::get>). This | ||||
| 179 | can be usefull for debugging purposes and for querying failure | ||||
| 180 | details. | ||||
| 181 | |||||
| 182 | =back | ||||
| 183 | |||||
| 184 | =head1 BUGS | ||||
| 185 | |||||
| 186 | If you find any, please report ;) | ||||
| 187 | |||||
| 188 | =head1 AUTHOR | ||||
| 189 | |||||
| 190 | Yuri Karaban F<E<lt>tech@askold.netE<gt>>. | ||||
| 191 | |||||
| 192 | Algorithm of computing checksum taken from mozilla module | ||||
| 193 | pagerankstatus F<http://pagerankstatus.mozdev.org> by | ||||
| 194 | Stephane Queraud F<E<lt>squeraud@toteme.comE<gt>>. | ||||
| 195 | |||||
| 196 | Algorithm was modified (15-09-2004) according to new algorithm of | ||||
| 197 | computingchecksum in googlebar. | ||||
| 198 | |||||
| 199 | =head1 COPYRIGHT | ||||
| 200 | |||||
| 201 | Copyright 2004-2006, Yuri Karaban, All Rights Reserved. | ||||
| 202 | |||||
| 203 | You may use, modify, and distribute this package under the | ||||
| 204 | same terms as Perl itself. | ||||
# spent 8.45ms within WWW::Google::PageRank::CORE:match which was called 922 times, avg 9µs/call:
# 461 times (5.82ms+0s) by WWW::Google::PageRank::get at line 30 of WWW/Google/PageRank.pm, avg 13µs/call
# 461 times (2.63ms+0s) by WWW::Google::PageRank::get at line 37 of WWW/Google/PageRank.pm, avg 6µs/call | |||||
# spent 4.29ms within WWW::Google::PageRank::CORE:pack which was called 461 times, avg 9µs/call:
# 461 times (4.29ms+0s) by WWW::Google::PageRank::_compute_ch_new at line 58 of WWW/Google/PageRank.pm, avg 9µs/call | |||||
# spent 13.4ms within WWW::Google::PageRank::CORE:unpack which was called 922 times, avg 15µs/call:
# 922 times (13.4ms+0s) by WWW::Google::PageRank::_compute_ch at line 64 of WWW/Google/PageRank.pm, avg 15µs/call |