File | /project/perl/lib/WWW/Google/PageRank.pm |
Statements Executed | 368262 |
Statement Execution Time | 4.39s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
5532 | 2 | 1 | 1.95s | 3.00s | _mix | WWW::Google::PageRank::
108796 | 10 | 1 | 1.16s | 1.16s | _wsub | WWW::Google::PageRank::
922 | 2 | 1 | 637ms | 3.89s | _compute_ch | WWW::Google::PageRank::
461 | 1 | 1 | 314ms | 4.31s | _compute_ch_new | WWW::Google::PageRank::
20650 | 12 | 1 | 236ms | 236ms | _wadd | WWW::Google::PageRank::
461 | 1 | 1 | 106ms | 153s | get | WWW::Google::PageRank::
922 | 1 | 2 | 13.4ms | 13.4ms | CORE:unpack (opcode) | WWW::Google::PageRank::
922 | 2 | 2 | 8.45ms | 8.45ms | CORE:match (opcode) | WWW::Google::PageRank::
461 | 1 | 2 | 4.29ms | 4.29ms | CORE:pack (opcode) | WWW::Google::PageRank::
1 | 1 | 1 | 100µs | 455µs | new | WWW::Google::PageRank::
0 | 0 | 0 | 0s | 0s | BEGIN | WWW::Google::PageRank::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package WWW::Google::PageRank; | ||||
2 | |||||
3 | # -*- perl -*- | ||||
4 | |||||
5 | 3 | 95µs | 1 | 28µs | use strict; # spent 28µs making 1 call to strict::import |
6 | 3 | 133µs | 1 | 123µs | use warnings; # spent 123µs making 1 call to warnings::import |
7 | |||||
8 | 3 | 94µs | 1 | 149µs | use vars qw($VERSION); # spent 149µs making 1 call to vars::import |
9 | |||||
10 | 3 | 7.71ms | 1 | 144µs | use LWP::UserAgent; # spent 144µs making 1 call to Exporter::import |
11 | 3 | 1.60ms | 1 | 212µs | use URI::Escape; # spent 212µs making 1 call to Exporter::import |
12 | |||||
13 | 1 | 7µs | $VERSION = '0.15'; | ||
14 | |||||
15 | # spent 455µs (100+355) within WWW::Google::PageRank::new which was called
# once (100µs+355µs) by main::RUNTIME at line 6 of ddd2.pl | ||||
16 | 8 | 96µs | my $class = shift; | ||
17 | my %par = @_; | ||||
18 | my $self; | ||||
19 | $self->{ua} = LWP::UserAgent->new(agent => $par{agent} || # spent 355µs making 1 call to LWP::UserAgent::new | ||||
20 | 'Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)') | ||||
21 | or return; | ||||
22 | $self->{ua}->proxy('http', $par{proxy}) if $par{proxy}; | ||||
23 | $self->{ua}->timeout($par{timeout}) if $par{timeout}; | ||||
24 | $self->{host} = $par{host} || 'toolbarqueries.google.com'; | ||||
25 | bless($self, $class); | ||||
26 | } | ||||
27 | |||||
28 | # spent 153s (106ms+152) within WWW::Google::PageRank::get which was called 461 times, avg 331ms/call:
# 461 times (106ms+152s) by main::RUNTIME at line 9 of ddd2.pl, avg 331ms/call | ||||
29 | 3681 | 77.4ms | my ($self, $url) = @_; | ||
30 | return unless defined $url and $url =~ m[^https?://]i; # spent 5.82ms making 461 calls to WWW::Google::PageRank::CORE:match, avg 13µs/call | ||||
31 | |||||
32 | my $ch = '6' . _compute_ch_new('info:' . $url); # spent 4.31s making 461 calls to WWW::Google::PageRank::_compute_ch_new, avg 9.34ms/call | ||||
33 | my $query = 'http://' . $self->{host} . '/search?client=navclient-auto&ch=' . $ch . # spent 120ms making 461 calls to URI::Escape::uri_escape, avg 261µs/call | ||||
34 | '&ie=UTF-8&oe=UTF-8&features=Rank&q=info:' . uri_escape($url); | ||||
35 | |||||
36 | my $resp = $self->{ua}->get($query); # spent 148s making 461 calls to LWP::UserAgent::get, avg 321ms/call | ||||
37 | if ($resp->is_success && $resp->content =~ /Rank_\d+:\d+:(\d+)/) { # spent 20.5ms making 461 calls to HTTP::Message::content, avg 44µs/call
# spent 20.3ms making 461 calls to HTTP::Response::is_success, avg 44µs/call
# spent 2.63ms making 461 calls to WWW::Google::PageRank::CORE:match, avg 6µs/call | ||||
38 | if (wantarray) { | ||||
39 | return ($1, $resp); | ||||
40 | } else { | ||||
41 | return $1; | ||||
42 | } | ||||
43 | } else { | ||||
44 | if (wantarray) { | ||||
45 | return (undef, $resp); | ||||
46 | } else { | ||||
47 | return; | ||||
48 | } | ||||
49 | } | ||||
50 | } | ||||
51 | |||||
52 | # spent 4.31s (314ms+3.99) within WWW::Google::PageRank::_compute_ch_new which was called 461 times, avg 9.34ms/call:
# 461 times (314ms+3.99s) by WWW::Google::PageRank::get at line 32, avg 9.34ms/call | ||||
53 | 29504 | 299ms | my $url = shift; | ||
54 | |||||
55 | my $ch = _compute_ch($url); # spent 1.62s making 461 calls to WWW::Google::PageRank::_compute_ch, avg 3.51ms/call | ||||
56 | $ch = (($ch % 0x0d) & 7) | (($ch / 7) << 2); | ||||
57 | |||||
58 | return _compute_ch(pack("V20", map {my $t = $ch; _wsub($t, $_*9); $t} 0..19)); # spent 2.27s making 461 calls to WWW::Google::PageRank::_compute_ch, avg 4.93ms/call
# spent 101ms making 9220 calls to WWW::Google::PageRank::_wsub, avg 11µs/call
# spent 4.29ms making 461 calls to WWW::Google::PageRank::CORE:pack, avg 9µs/call | ||||
59 | } | ||||
60 | |||||
61 | # spent 3.89s (637ms+3.25) within WWW::Google::PageRank::_compute_ch which was called 922 times, avg 4.22ms/call:
# 461 times (375ms+1.90s) by WWW::Google::PageRank::_compute_ch_new at line 58, avg 4.93ms/call
# 461 times (262ms+1.35s) by WWW::Google::PageRank::_compute_ch_new at line 55, avg 3.51ms/call | ||||
62 | 45178 | 593ms | my $url = shift; | ||
63 | |||||
64 | my @url = unpack("C*", $url); # spent 13.4ms making 922 calls to WWW::Google::PageRank::CORE:unpack, avg 15µs/call | ||||
65 | my ($a, $b, $c, $k) = (0x9e3779b9, 0x9e3779b9, 0xe6359a60, 0); | ||||
66 | my $len = scalar @url; | ||||
67 | |||||
68 | while ($len >= 12) { | ||||
69 | _wadd($a, $url[$k+0] | ($url[$k+1] << 8) | ($url[$k+2] << 16) | ($url[$k+3] << 24)); # spent 57.1ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 12µs/call | ||||
70 | _wadd($b, $url[$k+4] | ($url[$k+5] << 8) | ($url[$k+6] << 16) | ($url[$k+7] << 24)); # spent 51.1ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
71 | _wadd($c, $url[$k+8] | ($url[$k+9] << 8) | ($url[$k+10] << 16) | ($url[$k+11] << 24)); # spent 50.7ms making 4610 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
72 | |||||
73 | _mix($a, $b, $c); # spent 2.50s making 4610 calls to WWW::Google::PageRank::_mix, avg 542µs/call | ||||
74 | |||||
75 | $k += 12; | ||||
76 | $len -= 12; | ||||
77 | } | ||||
78 | |||||
79 | _wadd($c, scalar @url); # spent 10.6ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
80 | |||||
81 | _wadd($c, $url[$k+10] << 24) if $len > 10; | ||||
82 | _wadd($c, $url[$k+9] << 16) if $len > 9; | ||||
83 | _wadd($c, $url[$k+8] << 8) if $len > 8; | ||||
84 | _wadd($b, $url[$k+7] << 24) if $len > 7; # spent 5.10ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
85 | _wadd($b, $url[$k+6] << 16) if $len > 6; # spent 4.97ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
86 | _wadd($b, $url[$k+5] << 8) if $len > 5; # spent 4.98ms making 461 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
87 | _wadd($b, $url[$k+4]) if $len > 4; # spent 9.15ms making 831 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
88 | _wadd($a, $url[$k+3] << 24) if $len > 3; # spent 10.4ms making 918 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
89 | _wadd($a, $url[$k+2] << 16) if $len > 2; # spent 10.1ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
90 | _wadd($a, $url[$k+1] << 8) if $len > 1; # spent 12.0ms making 922 calls to WWW::Google::PageRank::_wadd, avg 13µs/call | ||||
91 | _wadd($a, $url[$k]) if $len > 0; # spent 9.93ms making 922 calls to WWW::Google::PageRank::_wadd, avg 11µs/call | ||||
92 | |||||
93 | _mix($a, $b, $c); # spent 501ms making 922 calls to WWW::Google::PageRank::_mix, avg 543µs/call | ||||
94 | |||||
95 | return $c; # integer is positive always | ||||
96 | } | ||||
97 | |||||
98 | sub _mix { | ||||
99 | 160428 | 1.76s | my ($a, $b, $c) = @_; | ||
100 | |||||
101 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 13; # spent 119ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
102 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 8) % 4294967296; # spent 119ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
103 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >>13; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
104 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 12; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
105 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 16) % 4294967296; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
106 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 5; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
107 | _wsub($a, $b); _wsub($a, $c); $a ^= $c >> 3; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
108 | _wsub($b, $c); _wsub($b, $a); $b ^= ($a << 10) % 4294967296; # spent 117ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
109 | _wsub($c, $a); _wsub($c, $b); $c ^= $b >> 15; # spent 116ms making 11064 calls to WWW::Google::PageRank::_wsub, avg 11µs/call | ||||
110 | |||||
111 | @_[0 .. $#_] = ($a, $b, $c); | ||||
112 | } | ||||
113 | |||||
114 | 20650 | 285ms | # spent 236ms within WWW::Google::PageRank::_wadd which was called 20650 times, avg 11µs/call:
# 4610 times (57.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 69, avg 12µs/call
# 4610 times (51.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 70, avg 11µs/call
# 4610 times (50.7ms+0s) by WWW::Google::PageRank::_compute_ch at line 71, avg 11µs/call
# 922 times (12.0ms+0s) by WWW::Google::PageRank::_compute_ch at line 90, avg 13µs/call
# 922 times (10.6ms+0s) by WWW::Google::PageRank::_compute_ch at line 79, avg 11µs/call
# 922 times (10.1ms+0s) by WWW::Google::PageRank::_compute_ch at line 89, avg 11µs/call
# 922 times (9.93ms+0s) by WWW::Google::PageRank::_compute_ch at line 91, avg 11µs/call
# 918 times (10.4ms+0s) by WWW::Google::PageRank::_compute_ch at line 88, avg 11µs/call
# 831 times (9.15ms+0s) by WWW::Google::PageRank::_compute_ch at line 87, avg 11µs/call
# 461 times (5.10ms+0s) by WWW::Google::PageRank::_compute_ch at line 84, avg 11µs/call
# 461 times (4.98ms+0s) by WWW::Google::PageRank::_compute_ch at line 86, avg 11µs/call
# 461 times (4.97ms+0s) by WWW::Google::PageRank::_compute_ch at line 85, avg 11µs/call | ||
115 | 108796 | 1.37s | # spent 1.16s within WWW::Google::PageRank::_wsub which was called 108796 times, avg 11µs/call:
# 11064 times (119ms+0s) by WWW::Google::PageRank::_mix at line 102, avg 11µs/call
# 11064 times (119ms+0s) by WWW::Google::PageRank::_mix at line 101, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 103, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 105, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 106, avg 11µs/call
# 11064 times (117ms+0s) by WWW::Google::PageRank::_mix at line 108, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 104, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 109, avg 11µs/call
# 11064 times (116ms+0s) by WWW::Google::PageRank::_mix at line 107, avg 11µs/call
# 9220 times (101ms+0s) by WWW::Google::PageRank::_compute_ch_new at line 58, avg 11µs/call | ||
116 | |||||
117 | 1 | 12µs | 1; | ||
118 | |||||
119 | |||||
120 | __END__ | ||||
121 | |||||
122 | =head1 NAME | ||||
123 | |||||
124 | WWW::Google::PageRank - Query google pagerank of page | ||||
125 | |||||
126 | =head1 SYNOPSIS | ||||
127 | |||||
128 | use WWW::Google::PageRank; | ||||
129 | my $pr = WWW::Google::PageRank->new; | ||||
130 | print scalar($pr->get('http://www.yahoo.com/')), "\n"; | ||||
131 | |||||
132 | =head1 DESCRIPTION | ||||
133 | |||||
134 | The C<WWW::Google::PageRank> is a class implementing a interface for | ||||
135 | querying google pagerank. | ||||
136 | |||||
137 | To use it, you should create C<WWW::Google::PageRank> object and use its | ||||
138 | method get(), to query page rank of URL. | ||||
139 | |||||
140 | It uses C<LWP::UserAgent> for making request to Google. | ||||
141 | |||||
142 | =head1 CONSTRUCTOR METHOD | ||||
143 | |||||
144 | =over 4 | ||||
145 | |||||
146 | =item $gpr = WWW::Google::PageRank->new(%options); | ||||
147 | |||||
148 | This method constructs a new C<WWW::Google::PageRank> object and returns it. | ||||
149 | Key/value pair arguments may be provided to set up the initial state. | ||||
150 | The following options correspond to attribute methods described below: | ||||
151 | |||||
152 | KEY DEFAULT | ||||
153 | ----------- -------------------- | ||||
154 | agent "Mozilla/4.0 (compatible; GoogleToolbar 2.0.111-big; Windows XP 5.1)" | ||||
155 | proxy undef | ||||
156 | timeout undef | ||||
157 | host "toolbarqueries.google.com" | ||||
158 | |||||
159 | C<agent> specifies the header 'User-Agent' when querying Google. If | ||||
160 | the C<proxy> option is passed in, requests will be made through | ||||
161 | specified poxy. C<proxy> is the host which serve requests from Googlebar. | ||||
162 | |||||
163 | =back | ||||
164 | |||||
165 | =head1 QUERY METHOD | ||||
166 | |||||
167 | =over 4 | ||||
168 | |||||
169 | =item $pr = $gpr->get('http://www.yahoo.com'); | ||||
170 | |||||
171 | Queries Google for a specified pagerank URL and returns pagerank. If | ||||
172 | query successfull, integer value from 0 to 10 returned. If query fails | ||||
173 | for some reason (google unreachable, url does not begin from | ||||
174 | 'http://', undefined url passed) it return C<undef>. | ||||
175 | |||||
176 | In list context this function returns list from two elements where | ||||
177 | first is the result as in scalar context and the second is the | ||||
178 | C<HTTP::Response> object (returned by C<LWP::UserAgent::get>). This | ||||
179 | can be usefull for debugging purposes and for querying failure | ||||
180 | details. | ||||
181 | |||||
182 | =back | ||||
183 | |||||
184 | =head1 BUGS | ||||
185 | |||||
186 | If you find any, please report ;) | ||||
187 | |||||
188 | =head1 AUTHOR | ||||
189 | |||||
190 | Yuri Karaban F<E<lt>tech@askold.netE<gt>>. | ||||
191 | |||||
192 | Algorithm of computing checksum taken from mozilla module | ||||
193 | pagerankstatus F<http://pagerankstatus.mozdev.org> by | ||||
194 | Stephane Queraud F<E<lt>squeraud@toteme.comE<gt>>. | ||||
195 | |||||
196 | Algorithm was modified (15-09-2004) according to new algorithm of | ||||
197 | computingchecksum in googlebar. | ||||
198 | |||||
199 | =head1 COPYRIGHT | ||||
200 | |||||
201 | Copyright 2004-2006, Yuri Karaban, All Rights Reserved. | ||||
202 | |||||
203 | You may use, modify, and distribute this package under the | ||||
204 | same terms as Perl itself. | ||||
# spent 8.45ms within WWW::Google::PageRank::CORE:match which was called 922 times, avg 9µs/call:
# 461 times (5.82ms+0s) by WWW::Google::PageRank::get at line 30 of WWW/Google/PageRank.pm, avg 13µs/call
# 461 times (2.63ms+0s) by WWW::Google::PageRank::get at line 37 of WWW/Google/PageRank.pm, avg 6µs/call | |||||
# spent 4.29ms within WWW::Google::PageRank::CORE:pack which was called 461 times, avg 9µs/call:
# 461 times (4.29ms+0s) by WWW::Google::PageRank::_compute_ch_new at line 58 of WWW/Google/PageRank.pm, avg 9µs/call | |||||
# spent 13.4ms within WWW::Google::PageRank::CORE:unpack which was called 922 times, avg 15µs/call:
# 922 times (13.4ms+0s) by WWW::Google::PageRank::_compute_ch at line 64 of WWW/Google/PageRank.pm, avg 15µs/call |