File | /project/perl/lib/HTML/Entities.pm |
Statements Executed | 944 |
Statement Execution Time | 16.1ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
253 | 1 | 2 | 1.27ms | 1.27ms | CORE:subst (opcode) | HTML::Entities::
1 | 1 | 2 | 18µs | 18µs | CORE:match (opcode) | HTML::Entities::
0 | 0 | 0 | 0s | 0s | BEGIN | HTML::Entities::
0 | 0 | 0 | 0s | 0s | Version | HTML::Entities::
0 | 0 | 0 | 0s | 0s | decode_entities_old | HTML::Entities::
0 | 0 | 0 | 0s | 0s | encode_entities | HTML::Entities::
0 | 0 | 0 | 0s | 0s | encode_entities_numeric | HTML::Entities::
0 | 0 | 0 | 0s | 0s | num_entity | HTML::Entities::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package HTML::Entities; | ||||
2 | |||||
3 | # $Id: Entities.pm,v 1.35 2006/03/22 09:15:23 gisle Exp $ | ||||
4 | |||||
5 | =head1 NAME | ||||
6 | |||||
7 | HTML::Entities - Encode or decode strings with HTML entities | ||||
8 | |||||
9 | =head1 SYNOPSIS | ||||
10 | |||||
11 | use HTML::Entities; | ||||
12 | |||||
13 | $a = "Våre norske tegn bør æres"; | ||||
14 | decode_entities($a); | ||||
15 | encode_entities($a, "\200-\377"); | ||||
16 | |||||
17 | For example, this: | ||||
18 | |||||
19 | $input = "vis-à-vis Beyoncé's naïve\npapier-mâché résumé"; | ||||
20 | print encode_entities($input), "\n" | ||||
21 | |||||
22 | Prints this out: | ||||
23 | |||||
24 | vis-à-vis Beyoncé's naïve | ||||
25 | papier-mâché résumé | ||||
26 | |||||
27 | =head1 DESCRIPTION | ||||
28 | |||||
29 | This module deals with encoding and decoding of strings with HTML | ||||
30 | character entities. The module provides the following functions: | ||||
31 | |||||
32 | =over 4 | ||||
33 | |||||
34 | =item decode_entities( $string, ... ) | ||||
35 | |||||
36 | This routine replaces HTML entities found in the $string with the | ||||
37 | corresponding Unicode character. Under perl 5.6 and earlier only | ||||
38 | characters in the Latin-1 range are replaced. Unrecognized | ||||
39 | entities are left alone. | ||||
40 | |||||
41 | If multiple strings are provided as argument they are each decoded | ||||
42 | separately and the same number of strings are returned. | ||||
43 | |||||
44 | If called in void context the arguments are decoded in-place. | ||||
45 | |||||
46 | This routine is exported by default. | ||||
47 | |||||
48 | =item _decode_entities( $string, \%entity2char ) | ||||
49 | |||||
50 | =item _decode_entities( $string, \%entity2char, $expand_prefix ) | ||||
51 | |||||
52 | This will in-place replace HTML entities in $string. The %entity2char | ||||
53 | hash must be provided. Named entities not found in the %entity2char | ||||
54 | hash are left alone. Numeric entities are expanded unless their value | ||||
55 | overflow. | ||||
56 | |||||
57 | The keys in %entity2char are the entity names to be expanded and their | ||||
58 | values are what they should expand into. The values do not have to be | ||||
59 | single character strings. If a key has ";" as suffix, | ||||
60 | then occurrences in $string are only expanded if properly terminated | ||||
61 | with ";". Entities without ";" will be expanded regardless of how | ||||
62 | they are terminated for compatiblity with how common browsers treat | ||||
63 | entities in the Latin-1 range. | ||||
64 | |||||
65 | If $expand_prefix is TRUE then entities without trailing ";" in | ||||
66 | %entity2char will even be expanded as a prefix of a longer | ||||
67 | unrecognized name. The longest matching name in %entity2char will be | ||||
68 | used. This is mainly present for compatibility with an MSIE | ||||
69 | misfeature. | ||||
70 | |||||
71 | $string = "foo bar"; | ||||
72 | _decode_entities($string, { nb => "@", nbsp => "\xA0" }, 1); | ||||
73 | print $string; # will print "foo bar" | ||||
74 | |||||
75 | This routine is exported by default. | ||||
76 | |||||
77 | =item encode_entities( $string ) | ||||
78 | |||||
79 | =item encode_entities( $string, $unsafe_chars ) | ||||
80 | |||||
81 | This routine replaces unsafe characters in $string with their entity | ||||
82 | representation. A second argument can be given to specify which | ||||
83 | characters to consider unsafe (i.e., which to escape). The default set | ||||
84 | of characters to encode are control chars, high-bit chars, and the | ||||
85 | C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >> | ||||
86 | characters. But this, for example, would encode I<just> the | ||||
87 | C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters: | ||||
88 | |||||
89 | $encoded = encode_entities($input, '<>&"'); | ||||
90 | |||||
91 | This routine is exported by default. | ||||
92 | |||||
93 | =item encode_entities_numeric( $string ) | ||||
94 | |||||
95 | =item encode_entities_numeric( $string, $unsafe_chars ) | ||||
96 | |||||
97 | This routine works just like encode_entities, except that the replacement | ||||
98 | entities are always C<&#xI<hexnum>;> and never C<&I<entname>;>. For | ||||
99 | example, C<encode_entities("r\xF4le")> returns "rôle", but | ||||
100 | C<encode_entities_numeric("r\xF4le")> returns "rôle". | ||||
101 | |||||
102 | This routine is I<not> exported by default. But you can always | ||||
103 | export it with C<use HTML::Entities qw(encode_entities_numeric);> | ||||
104 | or even C<use HTML::Entities qw(:DEFAULT encode_entities_numeric);> | ||||
105 | |||||
106 | =back | ||||
107 | |||||
108 | All these routines modify the string passed as the first argument, if | ||||
109 | called in a void context. In scalar and array contexts, the encoded or | ||||
110 | decoded string is returned (without changing the input string). | ||||
111 | |||||
112 | If you prefer not to import these routines into your namespace, you can | ||||
113 | call them as: | ||||
114 | |||||
115 | use HTML::Entities (); | ||||
116 | $decoded = HTML::Entities::decode($a); | ||||
117 | $encoded = HTML::Entities::encode($a); | ||||
118 | $encoded = HTML::Entities::encode_numeric($a); | ||||
119 | |||||
120 | The module can also export the %char2entity and the %entity2char | ||||
121 | hashes, which contain the mapping from all characters to the | ||||
122 | corresponding entities (and vice versa, respectively). | ||||
123 | |||||
124 | =head1 COPYRIGHT | ||||
125 | |||||
126 | Copyright 1995-2006 Gisle Aas. All rights reserved. | ||||
127 | |||||
128 | This library is free software; you can redistribute it and/or | ||||
129 | modify it under the same terms as Perl itself. | ||||
130 | |||||
131 | =cut | ||||
132 | |||||
133 | 3 | 111µs | 1 | 44µs | use strict; # spent 44µs making 1 call to strict::import |
134 | 3 | 94µs | 1 | 362µs | use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); # spent 362µs making 1 call to vars::import |
135 | 3 | 2.57ms | 1 | 215µs | use vars qw(%entity2char %char2entity); # spent 215µs making 1 call to vars::import |
136 | |||||
137 | 1 | 5µs | require 5.004; | ||
138 | 1 | 7µs | require Exporter; | ||
139 | 1 | 12µs | @ISA = qw(Exporter); | ||
140 | |||||
141 | 1 | 6µs | @EXPORT = qw(encode_entities decode_entities _decode_entities); | ||
142 | 1 | 5µs | @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric); | ||
143 | |||||
144 | 1 | 63µs | 1 | 18µs | $VERSION = sprintf("%d.%02d", q$Revision: 1.35 $ =~ /(\d+)\.(\d+)/); # spent 18µs making 1 call to HTML::Entities::CORE:match |
145 | sub Version { $VERSION; } | ||||
146 | |||||
147 | 1 | 729µs | require HTML::Parser; # for fast XS implemented decode_entities | ||
148 | |||||
149 | |||||
150 | 1 | 1.23ms | %entity2char = ( | ||
151 | # Some normal chars that have special meaning in SGML context | ||||
152 | amp => '&', # ampersand | ||||
153 | 'gt' => '>', # greater than | ||||
154 | 'lt' => '<', # less than | ||||
155 | quot => '"', # double quote | ||||
156 | apos => "'", # single quote | ||||
157 | |||||
158 | # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML | ||||
159 | AElig => chr(198), # capital AE diphthong (ligature) | ||||
160 | Aacute => chr(193), # capital A, acute accent | ||||
161 | Acirc => chr(194), # capital A, circumflex accent | ||||
162 | Agrave => chr(192), # capital A, grave accent | ||||
163 | Aring => chr(197), # capital A, ring | ||||
164 | Atilde => chr(195), # capital A, tilde | ||||
165 | Auml => chr(196), # capital A, dieresis or umlaut mark | ||||
166 | Ccedil => chr(199), # capital C, cedilla | ||||
167 | ETH => chr(208), # capital Eth, Icelandic | ||||
168 | Eacute => chr(201), # capital E, acute accent | ||||
169 | Ecirc => chr(202), # capital E, circumflex accent | ||||
170 | Egrave => chr(200), # capital E, grave accent | ||||
171 | Euml => chr(203), # capital E, dieresis or umlaut mark | ||||
172 | Iacute => chr(205), # capital I, acute accent | ||||
173 | Icirc => chr(206), # capital I, circumflex accent | ||||
174 | Igrave => chr(204), # capital I, grave accent | ||||
175 | Iuml => chr(207), # capital I, dieresis or umlaut mark | ||||
176 | Ntilde => chr(209), # capital N, tilde | ||||
177 | Oacute => chr(211), # capital O, acute accent | ||||
178 | Ocirc => chr(212), # capital O, circumflex accent | ||||
179 | Ograve => chr(210), # capital O, grave accent | ||||
180 | Oslash => chr(216), # capital O, slash | ||||
181 | Otilde => chr(213), # capital O, tilde | ||||
182 | Ouml => chr(214), # capital O, dieresis or umlaut mark | ||||
183 | THORN => chr(222), # capital THORN, Icelandic | ||||
184 | Uacute => chr(218), # capital U, acute accent | ||||
185 | Ucirc => chr(219), # capital U, circumflex accent | ||||
186 | Ugrave => chr(217), # capital U, grave accent | ||||
187 | Uuml => chr(220), # capital U, dieresis or umlaut mark | ||||
188 | Yacute => chr(221), # capital Y, acute accent | ||||
189 | aacute => chr(225), # small a, acute accent | ||||
190 | acirc => chr(226), # small a, circumflex accent | ||||
191 | aelig => chr(230), # small ae diphthong (ligature) | ||||
192 | agrave => chr(224), # small a, grave accent | ||||
193 | aring => chr(229), # small a, ring | ||||
194 | atilde => chr(227), # small a, tilde | ||||
195 | auml => chr(228), # small a, dieresis or umlaut mark | ||||
196 | ccedil => chr(231), # small c, cedilla | ||||
197 | eacute => chr(233), # small e, acute accent | ||||
198 | ecirc => chr(234), # small e, circumflex accent | ||||
199 | egrave => chr(232), # small e, grave accent | ||||
200 | eth => chr(240), # small eth, Icelandic | ||||
201 | euml => chr(235), # small e, dieresis or umlaut mark | ||||
202 | iacute => chr(237), # small i, acute accent | ||||
203 | icirc => chr(238), # small i, circumflex accent | ||||
204 | igrave => chr(236), # small i, grave accent | ||||
205 | iuml => chr(239), # small i, dieresis or umlaut mark | ||||
206 | ntilde => chr(241), # small n, tilde | ||||
207 | oacute => chr(243), # small o, acute accent | ||||
208 | ocirc => chr(244), # small o, circumflex accent | ||||
209 | ograve => chr(242), # small o, grave accent | ||||
210 | oslash => chr(248), # small o, slash | ||||
211 | otilde => chr(245), # small o, tilde | ||||
212 | ouml => chr(246), # small o, dieresis or umlaut mark | ||||
213 | szlig => chr(223), # small sharp s, German (sz ligature) | ||||
214 | thorn => chr(254), # small thorn, Icelandic | ||||
215 | uacute => chr(250), # small u, acute accent | ||||
216 | ucirc => chr(251), # small u, circumflex accent | ||||
217 | ugrave => chr(249), # small u, grave accent | ||||
218 | uuml => chr(252), # small u, dieresis or umlaut mark | ||||
219 | yacute => chr(253), # small y, acute accent | ||||
220 | yuml => chr(255), # small y, dieresis or umlaut mark | ||||
221 | |||||
222 | # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) | ||||
223 | copy => chr(169), # copyright sign | ||||
224 | reg => chr(174), # registered sign | ||||
225 | nbsp => chr(160), # non breaking space | ||||
226 | |||||
227 | # Additional ISO-8859/1 entities listed in rfc1866 (section 14) | ||||
228 | iexcl => chr(161), | ||||
229 | cent => chr(162), | ||||
230 | pound => chr(163), | ||||
231 | curren => chr(164), | ||||
232 | yen => chr(165), | ||||
233 | brvbar => chr(166), | ||||
234 | sect => chr(167), | ||||
235 | uml => chr(168), | ||||
236 | ordf => chr(170), | ||||
237 | laquo => chr(171), | ||||
238 | 'not' => chr(172), # not is a keyword in perl | ||||
239 | shy => chr(173), | ||||
240 | macr => chr(175), | ||||
241 | deg => chr(176), | ||||
242 | plusmn => chr(177), | ||||
243 | sup1 => chr(185), | ||||
244 | sup2 => chr(178), | ||||
245 | sup3 => chr(179), | ||||
246 | acute => chr(180), | ||||
247 | micro => chr(181), | ||||
248 | para => chr(182), | ||||
249 | middot => chr(183), | ||||
250 | cedil => chr(184), | ||||
251 | ordm => chr(186), | ||||
252 | raquo => chr(187), | ||||
253 | frac14 => chr(188), | ||||
254 | frac12 => chr(189), | ||||
255 | frac34 => chr(190), | ||||
256 | iquest => chr(191), | ||||
257 | 'times' => chr(215), # times is a keyword in perl | ||||
258 | divide => chr(247), | ||||
259 | |||||
260 | ( $] > 5.007 ? ( | ||||
261 | 'OElig;' => chr(338), | ||||
262 | 'oelig;' => chr(339), | ||||
263 | 'Scaron;' => chr(352), | ||||
264 | 'scaron;' => chr(353), | ||||
265 | 'Yuml;' => chr(376), | ||||
266 | 'fnof;' => chr(402), | ||||
267 | 'circ;' => chr(710), | ||||
268 | 'tilde;' => chr(732), | ||||
269 | 'Alpha;' => chr(913), | ||||
270 | 'Beta;' => chr(914), | ||||
271 | 'Gamma;' => chr(915), | ||||
272 | 'Delta;' => chr(916), | ||||
273 | 'Epsilon;' => chr(917), | ||||
274 | 'Zeta;' => chr(918), | ||||
275 | 'Eta;' => chr(919), | ||||
276 | 'Theta;' => chr(920), | ||||
277 | 'Iota;' => chr(921), | ||||
278 | 'Kappa;' => chr(922), | ||||
279 | 'Lambda;' => chr(923), | ||||
280 | 'Mu;' => chr(924), | ||||
281 | 'Nu;' => chr(925), | ||||
282 | 'Xi;' => chr(926), | ||||
283 | 'Omicron;' => chr(927), | ||||
284 | 'Pi;' => chr(928), | ||||
285 | 'Rho;' => chr(929), | ||||
286 | 'Sigma;' => chr(931), | ||||
287 | 'Tau;' => chr(932), | ||||
288 | 'Upsilon;' => chr(933), | ||||
289 | 'Phi;' => chr(934), | ||||
290 | 'Chi;' => chr(935), | ||||
291 | 'Psi;' => chr(936), | ||||
292 | 'Omega;' => chr(937), | ||||
293 | 'alpha;' => chr(945), | ||||
294 | 'beta;' => chr(946), | ||||
295 | 'gamma;' => chr(947), | ||||
296 | 'delta;' => chr(948), | ||||
297 | 'epsilon;' => chr(949), | ||||
298 | 'zeta;' => chr(950), | ||||
299 | 'eta;' => chr(951), | ||||
300 | 'theta;' => chr(952), | ||||
301 | 'iota;' => chr(953), | ||||
302 | 'kappa;' => chr(954), | ||||
303 | 'lambda;' => chr(955), | ||||
304 | 'mu;' => chr(956), | ||||
305 | 'nu;' => chr(957), | ||||
306 | 'xi;' => chr(958), | ||||
307 | 'omicron;' => chr(959), | ||||
308 | 'pi;' => chr(960), | ||||
309 | 'rho;' => chr(961), | ||||
310 | 'sigmaf;' => chr(962), | ||||
311 | 'sigma;' => chr(963), | ||||
312 | 'tau;' => chr(964), | ||||
313 | 'upsilon;' => chr(965), | ||||
314 | 'phi;' => chr(966), | ||||
315 | 'chi;' => chr(967), | ||||
316 | 'psi;' => chr(968), | ||||
317 | 'omega;' => chr(969), | ||||
318 | 'thetasym;' => chr(977), | ||||
319 | 'upsih;' => chr(978), | ||||
320 | 'piv;' => chr(982), | ||||
321 | 'ensp;' => chr(8194), | ||||
322 | 'emsp;' => chr(8195), | ||||
323 | 'thinsp;' => chr(8201), | ||||
324 | 'zwnj;' => chr(8204), | ||||
325 | 'zwj;' => chr(8205), | ||||
326 | 'lrm;' => chr(8206), | ||||
327 | 'rlm;' => chr(8207), | ||||
328 | 'ndash;' => chr(8211), | ||||
329 | 'mdash;' => chr(8212), | ||||
330 | 'lsquo;' => chr(8216), | ||||
331 | 'rsquo;' => chr(8217), | ||||
332 | 'sbquo;' => chr(8218), | ||||
333 | 'ldquo;' => chr(8220), | ||||
334 | 'rdquo;' => chr(8221), | ||||
335 | 'bdquo;' => chr(8222), | ||||
336 | 'dagger;' => chr(8224), | ||||
337 | 'Dagger;' => chr(8225), | ||||
338 | 'bull;' => chr(8226), | ||||
339 | 'hellip;' => chr(8230), | ||||
340 | 'permil;' => chr(8240), | ||||
341 | 'prime;' => chr(8242), | ||||
342 | 'Prime;' => chr(8243), | ||||
343 | 'lsaquo;' => chr(8249), | ||||
344 | 'rsaquo;' => chr(8250), | ||||
345 | 'oline;' => chr(8254), | ||||
346 | 'frasl;' => chr(8260), | ||||
347 | 'euro;' => chr(8364), | ||||
348 | 'image;' => chr(8465), | ||||
349 | 'weierp;' => chr(8472), | ||||
350 | 'real;' => chr(8476), | ||||
351 | 'trade;' => chr(8482), | ||||
352 | 'alefsym;' => chr(8501), | ||||
353 | 'larr;' => chr(8592), | ||||
354 | 'uarr;' => chr(8593), | ||||
355 | 'rarr;' => chr(8594), | ||||
356 | 'darr;' => chr(8595), | ||||
357 | 'harr;' => chr(8596), | ||||
358 | 'crarr;' => chr(8629), | ||||
359 | 'lArr;' => chr(8656), | ||||
360 | 'uArr;' => chr(8657), | ||||
361 | 'rArr;' => chr(8658), | ||||
362 | 'dArr;' => chr(8659), | ||||
363 | 'hArr;' => chr(8660), | ||||
364 | 'forall;' => chr(8704), | ||||
365 | 'part;' => chr(8706), | ||||
366 | 'exist;' => chr(8707), | ||||
367 | 'empty;' => chr(8709), | ||||
368 | 'nabla;' => chr(8711), | ||||
369 | 'isin;' => chr(8712), | ||||
370 | 'notin;' => chr(8713), | ||||
371 | 'ni;' => chr(8715), | ||||
372 | 'prod;' => chr(8719), | ||||
373 | 'sum;' => chr(8721), | ||||
374 | 'minus;' => chr(8722), | ||||
375 | 'lowast;' => chr(8727), | ||||
376 | 'radic;' => chr(8730), | ||||
377 | 'prop;' => chr(8733), | ||||
378 | 'infin;' => chr(8734), | ||||
379 | 'ang;' => chr(8736), | ||||
380 | 'and;' => chr(8743), | ||||
381 | 'or;' => chr(8744), | ||||
382 | 'cap;' => chr(8745), | ||||
383 | 'cup;' => chr(8746), | ||||
384 | 'int;' => chr(8747), | ||||
385 | 'there4;' => chr(8756), | ||||
386 | 'sim;' => chr(8764), | ||||
387 | 'cong;' => chr(8773), | ||||
388 | 'asymp;' => chr(8776), | ||||
389 | 'ne;' => chr(8800), | ||||
390 | 'equiv;' => chr(8801), | ||||
391 | 'le;' => chr(8804), | ||||
392 | 'ge;' => chr(8805), | ||||
393 | 'sub;' => chr(8834), | ||||
394 | 'sup;' => chr(8835), | ||||
395 | 'nsub;' => chr(8836), | ||||
396 | 'sube;' => chr(8838), | ||||
397 | 'supe;' => chr(8839), | ||||
398 | 'oplus;' => chr(8853), | ||||
399 | 'otimes;' => chr(8855), | ||||
400 | 'perp;' => chr(8869), | ||||
401 | 'sdot;' => chr(8901), | ||||
402 | 'lceil;' => chr(8968), | ||||
403 | 'rceil;' => chr(8969), | ||||
404 | 'lfloor;' => chr(8970), | ||||
405 | 'rfloor;' => chr(8971), | ||||
406 | 'lang;' => chr(9001), | ||||
407 | 'rang;' => chr(9002), | ||||
408 | 'loz;' => chr(9674), | ||||
409 | 'spades;' => chr(9824), | ||||
410 | 'clubs;' => chr(9827), | ||||
411 | 'hearts;' => chr(9829), | ||||
412 | 'diams;' => chr(9830), | ||||
413 | ) : ()) | ||||
414 | ); | ||||
415 | |||||
416 | |||||
417 | # Make the opposite mapping | ||||
418 | 1 | 20µs | while (my($entity, $char) = each(%entity2char)) { | ||
419 | 253 | 4.28ms | 253 | 1.27ms | $entity =~ s/;\z//; # spent 1.27ms making 253 calls to HTML::Entities::CORE:subst, avg 5µs/call |
420 | 253 | 3.63ms | $char2entity{$char} = "&$entity;"; | ||
421 | } | ||||
422 | 1 | 7µs | delete $char2entity{"'"}; # only one-way decoding | ||
423 | |||||
424 | # Fill in missing entities | ||||
425 | 1 | 438µs | for (0 .. 255) { | ||
426 | 256 | 1.12ms | next if exists $char2entity{chr($_)}; | ||
427 | 156 | 1.57ms | $char2entity{chr($_)} = "&#$_;"; | ||
428 | } | ||||
429 | |||||
430 | 1 | 5µs | my %subst; # compiled encoding regexps | ||
431 | |||||
432 | sub decode_entities_old | ||||
433 | { | ||||
434 | my $array; | ||||
435 | if (defined wantarray) { | ||||
436 | $array = [@_]; # copy | ||||
437 | } else { | ||||
438 | $array = \@_; # modify in-place | ||||
439 | } | ||||
440 | my $c; | ||||
441 | for (@$array) { | ||||
442 | s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg; | ||||
443 | s/(&\#[xX]([0-9a-fA-F]+);?)/$c = hex($2); $c < 256 ? chr($c) : $1/eg; | ||||
444 | s/(&(\w+);?)/$entity2char{$2} || $1/eg; | ||||
445 | } | ||||
446 | wantarray ? @$array : $array->[0]; | ||||
447 | } | ||||
448 | |||||
449 | sub encode_entities | ||||
450 | { | ||||
451 | my $ref; | ||||
452 | if (defined wantarray) { | ||||
453 | my $x = $_[0]; | ||||
454 | $ref = \$x; # copy | ||||
455 | } else { | ||||
456 | $ref = \$_[0]; # modify in-place | ||||
457 | } | ||||
458 | if (defined $_[1] and length $_[1]) { | ||||
459 | unless (exists $subst{$_[1]}) { | ||||
460 | # Because we can't compile regex we fake it with a cached sub | ||||
461 | my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }"; | ||||
462 | $subst{$_[1]} = eval $code; | ||||
463 | die( $@ . " while trying to turn range: \"$_[1]\"\n " | ||||
464 | . "into code: $code\n " | ||||
465 | ) if $@; | ||||
466 | } | ||||
467 | &{$subst{$_[1]}}($$ref); | ||||
468 | } else { | ||||
469 | # Encode control chars, high bit chars and '<', '&', '>', ''' and '"' | ||||
470 | $$ref =~ s/([^\n\r\t !\#\$%\(-;=?-~])/$char2entity{$1} || num_entity($1)/ge; | ||||
471 | } | ||||
472 | $$ref; | ||||
473 | } | ||||
474 | |||||
475 | sub encode_entities_numeric { | ||||
476 | local %char2entity; | ||||
477 | return &encode_entities; # a goto &encode_entities wouldn't work | ||||
478 | } | ||||
479 | |||||
480 | |||||
481 | sub num_entity { | ||||
482 | sprintf "&#x%X;", ord($_[0]); | ||||
483 | } | ||||
484 | |||||
485 | # Set up aliases | ||||
486 | 1 | 10µs | *encode = \&encode_entities; | ||
487 | 1 | 5µs | *encode_numeric = \&encode_entities_numeric; | ||
488 | 1 | 5µs | *encode_numerically = \&encode_entities_numeric; | ||
489 | 1 | 5µs | *decode = \&decode_entities; | ||
490 | |||||
491 | 1 | 125µs | 1; | ||
# spent 18µs within HTML::Entities::CORE:match which was called
# once (18µs+0s) by LWP::Protocol::collect at line 144 of HTML/Entities.pm | |||||
# spent 1.27ms within HTML::Entities::CORE:subst which was called 253 times, avg 5µs/call:
# 253 times (1.27ms+0s) by LWP::Protocol::collect at line 419 of HTML/Entities.pm, avg 5µs/call |