← Index
NYTProf Performance Profile   « block view • line view • sub view »
For ddd2.pl
  Run on Tue May 25 16:52:24 2010
Reported on Tue May 25 16:56:47 2010

File /project/perl/lib/File/GlobMapper.pm
Statements Executed 30
Statement Execution Time 4.11ms
Subroutines — ordered by exclusive time
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
0000s0sFile::GlobMapper::::BEGINFile::GlobMapper::BEGIN
0000s0sFile::GlobMapper::::_getFilesFile::GlobMapper::_getFiles
0000s0sFile::GlobMapper::::_parseBitFile::GlobMapper::_parseBit
0000s0sFile::GlobMapper::::_parseInputGlobFile::GlobMapper::_parseInputGlob
0000s0sFile::GlobMapper::::_parseOutputGlobFile::GlobMapper::_parseOutputGlob
0000s0sFile::GlobMapper::::_retErrorFile::GlobMapper::_retError
0000s0sFile::GlobMapper::::_unmatchedFile::GlobMapper::_unmatched
0000s0sFile::GlobMapper::::getFileMapFile::GlobMapper::getFileMap
0000s0sFile::GlobMapper::::getHashFile::GlobMapper::getHash
0000s0sFile::GlobMapper::::globmapFile::GlobMapper::globmap
0000s0sFile::GlobMapper::::newFile::GlobMapper::new
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package File::GlobMapper;
2
3389µs126µsuse strict;
# spent 26µs making 1 call to strict::import
4387µs1218µsuse warnings;
# spent 218µs making 1 call to warnings::import
53292µs1235µsuse Carp;
# spent 235µs making 1 call to Exporter::import
6
714µsour ($CSH_GLOB);
8
9BEGIN
10{
115319µs if ($] < 5.006)
12 {
13 require File::BSDGlob; import File::BSDGlob qw(:glob) ;
14 $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
15 *globber = \&File::BSDGlob::csh_glob;
16 }
17 else
18 {
19 require File::Glob; import File::Glob qw(:glob) ;
# spent 68µs making 1 call to File::Glob::import
20 $CSH_GLOB = File::Glob::GLOB_CSH() ;
# spent 135µs making 1 call to File::Glob::GLOB_CSH
21 #*globber = \&File::Glob::bsd_glob;
22 *globber = \&File::Glob::csh_glob;
23 }
2412.76ms}
25
2613µsour ($Error);
27
2815µsour ($VERSION, @EXPORT_OK);
2916µs$VERSION = '0.000_02';
3016µs@EXPORT_OK = qw( globmap );
31
32
3315µsour ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
3415µs$noPreBS = '(?<!\\\)' ; # no preceeding backslash
3515µs$metachars = '.*?[](){}';
3617µs$matchMetaRE = '[' . quotemeta($metachars) . ']';
37
38112µs%mapping = (
39 '*' => '([^/]*)',
40 '?' => '([^/])',
41 '.' => '\.',
42 '[' => '([',
43 '(' => '(',
44 ')' => ')',
45 );
46
47118µs%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
48
49sub globmap ($$;)
50{
51 my $inputGlob = shift ;
52 my $outputGlob = shift ;
53
54 my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
55 or croak "globmap: $Error" ;
56 return $obj->getFileMap();
57}
58
59sub new
60{
61 my $class = shift ;
62 my $inputGlob = shift ;
63 my $outputGlob = shift ;
64 # TODO -- flags needs to default to whatever File::Glob does
65 my $flags = shift || $CSH_GLOB ;
66 #my $flags = shift ;
67
68 $inputGlob =~ s/^\s*\<\s*//;
69 $inputGlob =~ s/\s*\>\s*$//;
70
71 $outputGlob =~ s/^\s*\<\s*//;
72 $outputGlob =~ s/\s*\>\s*$//;
73
74 my %object =
75 ( InputGlob => $inputGlob,
76 OutputGlob => $outputGlob,
77 GlobFlags => $flags,
78 Braces => 0,
79 WildCount => 0,
80 Pairs => [],
81 Sigil => '#',
82 );
83
84 my $self = bless \%object, ref($class) || $class ;
85
86 $self->_parseInputGlob()
87 or return undef ;
88
89 $self->_parseOutputGlob()
90 or return undef ;
91
92 my @inputFiles = globber($self->{InputGlob}, $flags) ;
93
94 if (GLOB_ERROR)
95 {
96 $Error = $!;
97 return undef ;
98 }
99
100 #if (whatever)
101 {
102 my $missing = grep { ! -e $_ } @inputFiles ;
103
104 if ($missing)
105 {
106 $Error = "$missing input files do not exist";
107 return undef ;
108 }
109 }
110
111 $self->{InputFiles} = \@inputFiles ;
112
113 $self->_getFiles()
114 or return undef ;
115
116 return $self;
117}
118
119sub _retError
120{
121 my $string = shift ;
122 $Error = "$string in input fileglob" ;
123 return undef ;
124}
125
126sub _unmatched
127{
128 my $delimeter = shift ;
129
130 _retError("Unmatched $delimeter");
131 return undef ;
132}
133
134sub _parseBit
135{
136 my $self = shift ;
137
138 my $string = shift ;
139
140 my $out = '';
141 my $depth = 0 ;
142
143 while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
144 {
145 $out .= quotemeta($1) ;
146 $out .= $mapping{$2} if defined $mapping{$2};
147
148 ++ $self->{WildCount} if $wildCount{$2} ;
149
150 if ($2 eq ',')
151 {
152 return _unmatched "("
153 if $depth ;
154
155 $out .= '|';
156 }
157 elsif ($2 eq '(')
158 {
159 ++ $depth ;
160 }
161 elsif ($2 eq ')')
162 {
163 return _unmatched ")"
164 if ! $depth ;
165
166 -- $depth ;
167 }
168 elsif ($2 eq '[')
169 {
170 # TODO -- quotemeta & check no '/'
171 # TODO -- check for \] & other \ within the []
172 $string =~ s#(.*?\])##
173 or return _unmatched "[" ;
174 $out .= "$1)" ;
175 }
176 elsif ($2 eq ']')
177 {
178 return _unmatched "]" ;
179 }
180 elsif ($2 eq '{' || $2 eq '}')
181 {
182 return _retError "Nested {} not allowed" ;
183 }
184 }
185
186 $out .= quotemeta $string;
187
188 return _unmatched "("
189 if $depth ;
190
191 return $out ;
192}
193
194sub _parseInputGlob
195{
196 my $self = shift ;
197
198 my $string = $self->{InputGlob} ;
199 my $inGlob = '';
200
201 # Multiple concatenated *'s don't make sense
202 #$string =~ s#\*\*+#*# ;
203
204 # TODO -- Allow space to delimit patterns?
205 #my @strings = split /\s+/, $string ;
206 #for my $str (@strings)
207 my $out = '';
208 my $depth = 0 ;
209
210 while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
211 {
212 $out .= quotemeta($1) ;
213 $out .= $mapping{$2} if defined $mapping{$2};
214 ++ $self->{WildCount} if $wildCount{$2} ;
215
216 if ($2 eq '(')
217 {
218 ++ $depth ;
219 }
220 elsif ($2 eq ')')
221 {
222 return _unmatched ")"
223 if ! $depth ;
224
225 -- $depth ;
226 }
227 elsif ($2 eq '[')
228 {
229 # TODO -- quotemeta & check no '/' or '(' or ')'
230 # TODO -- check for \] & other \ within the []
231 $string =~ s#(.*?\])##
232 or return _unmatched "[";
233 $out .= "$1)" ;
234 }
235 elsif ($2 eq ']')
236 {
237 return _unmatched "]" ;
238 }
239 elsif ($2 eq '}')
240 {
241 return _unmatched "}" ;
242 }
243 elsif ($2 eq '{')
244 {
245 # TODO -- check no '/' within the {}
246 # TODO -- check for \} & other \ within the {}
247
248 my $tmp ;
249 unless ( $string =~ s/(.*?)$noPreBS\}//)
250 {
251 return _unmatched "{";
252 }
253 #$string =~ s#(.*?)\}##;
254
255 #my $alt = join '|',
256 # map { quotemeta $_ }
257 # split "$noPreBS,", $1 ;
258 my $alt = $self->_parseBit($1);
259 defined $alt or return 0 ;
260 $out .= "($alt)" ;
261
262 ++ $self->{Braces} ;
263 }
264 }
265
266 return _unmatched "("
267 if $depth ;
268
269 $out .= quotemeta $string ;
270
271
272 $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
273 $self->{InputPattern} = $out ;
274
275 #print "# INPUT '$self->{InputGlob}' => '$out'\n";
276
277 return 1 ;
278
279}
280
281sub _parseOutputGlob
282{
283 my $self = shift ;
284
285 my $string = $self->{OutputGlob} ;
286 my $maxwild = $self->{WildCount};
287
288 if ($self->{GlobFlags} & GLOB_TILDE)
289 #if (1)
290 {
291 $string =~ s{
292 ^ ~ # find a leading tilde
293 ( # save this in $1
294 [^/] # a non-slash character
295 * # repeated 0 or more times (0 means me)
296 )
297 }{
298 $1
299 ? (getpwnam($1))[7]
300 : ( $ENV{HOME} || $ENV{LOGDIR} )
301 }ex;
302
303 }
304
305 # max #1 must be == to max no of '*' in input
306 while ( $string =~ m/#(\d)/g )
307 {
308 croak "Max wild is #$maxwild, you tried #$1"
309 if $1 > $maxwild ;
310 }
311
312 my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
313 #warn "noPreBS = '$noPreBS'\n";
314
315 #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
316 $string =~ s/${noPreBS}#(\d)/\${$1}/g;
317 $string =~ s#${noPreBS}\*#\${inFile}#g;
318 $string = '"' . $string . '"';
319
320 #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
321 $self->{OutputPattern} = $string ;
322
323 return 1 ;
324}
325
326sub _getFiles
327{
328 my $self = shift ;
329
330 my %outInMapping = ();
331 my %inFiles = () ;
332
333 foreach my $inFile (@{ $self->{InputFiles} })
334 {
335 next if $inFiles{$inFile} ++ ;
336
337 my $outFile = $inFile ;
338
339 if ( $inFile =~ m/$self->{InputPattern}/ )
340 {
3413456µs1101µs no warnings 'uninitialized';
# spent 101µs making 1 call to warnings::unimport
342 eval "\$outFile = $self->{OutputPattern};" ;
343
344 if (defined $outInMapping{$outFile})
345 {
346 $Error = "multiple input files map to one output file";
347 return undef ;
348 }
349 $outInMapping{$outFile} = $inFile;
350 push @{ $self->{Pairs} }, [$inFile, $outFile];
351 }
352 }
353
354 return 1 ;
355}
356
357sub getFileMap
358{
359 my $self = shift ;
360
361 return $self->{Pairs} ;
362}
363
364sub getHash
365{
366 my $self = shift ;
367
368 return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
369}
370
371125µs1;
372
373__END__
374
375=head1 NAME
376
377File::GlobMapper - Extend File Glob to Allow Input and Output Files
378
379=head1 SYNOPSIS
380
381 use File::GlobMapper qw( globmap );
382
383 my $aref = globmap $input => $output
384 or die $File::GlobMapper::Error ;
385
386 my $gm = new File::GlobMapper $input => $output
387 or die $File::GlobMapper::Error ;
388
389
390=head1 DESCRIPTION
391
392B<WARNING Alpha Release Alert!>
393
394=over 5
395
396=item * This code is a work in progress.
397
398=item * There are known bugs.
399
400=item * The interface defined here is tentative.
401
402=item * There are portability issues.
403
404=item * Do not use in production code.
405
406=item * Consider yourself warned!
407
408=back
409
410This module needs Perl5.005 or better.
411
412This module takes the existing C<File::Glob> module as a starting point and
413extends it to allow new filenames to be derived from the files matched by
414C<File::Glob>.
415
416This can be useful when carrying out batch operations on multiple files that
417have both an input filename and output filename and the output file can be
418derived from the input filename. Examples of operations where this can be
419useful include, file renaming, file copying and file compression.
420
421
422=head2 Behind The Scenes
423
424To help explain what C<File::GlobMapper> does, consider what code you
425would write if you wanted to rename all files in the current directory
426that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
427current directory
428
429 alpha.tar.gz
430 beta.tar.gz
431 gamma.tar.gz
432
433and they need renamed to this
434
435 alpha.tgz
436 beta.tgz
437 gamma.tgz
438
439Below is a possible implementation of a script to carry out the rename
440(error cases have been omitted)
441
442 foreach my $old ( glob "*.tar.gz" )
443 {
444 my $new = $old;
445 $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
446
447 rename $old => $new
448 or die "Cannot rename '$old' to '$new': $!\n;
449 }
450
451Notice that a file glob pattern C<*.tar.gz> was used to match the
452C<.tar.gz> files, then a fairly similar regular expression was used in
453the substitute to allow the new filename to be created.
454
455Given that the file glob is just a cut-down regular expression and that it
456has already done a lot of the hard work in pattern matching the filenames,
457wouldn't it be handy to be able to use the patterns in the fileglob to
458drive the new filename?
459
460Well, that's I<exactly> what C<File::GlobMapper> does.
461
462Here is same snippet of code rewritten using C<globmap>
463
464 for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
465 {
466 my ($from, $to) = @$pair;
467 rename $from => $to
468 or die "Cannot rename '$old' to '$new': $!\n;
469 }
470
471So how does it work?
472
473Behind the scenes the C<globmap> function does a combination of a
474file glob to match existing filenames followed by a substitute
475to create the new filenames.
476
477Notice how both parameters to C<globmap> are strings that are delimited by <>.
478This is done to make them look more like file globs - it is just syntactic
479sugar, but it can be handy when you want the strings to be visually
480distinctive. The enclosing <> are optional, so you don't have to use them - in
481fact the first thing globmap will do is remove these delimiters if they are
482present.
483
484The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
485Once the enclosing "< ... >" is removed, this is passed (more or
486less) unchanged to C<File::Glob> to carry out a file match.
487
488Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
489full Perl regular expression, with the additional step of wrapping each
490transformed wildcard metacharacter sequence in parenthesis.
491
492In this case the input fileglob C<*.tar.gz> will be transformed into
493this Perl regular expression
494
495 ([^/]*)\.tar\.gz
496
497Wrapping with parenthesis allows the wildcard parts of the Input File
498Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
499the I<Output File Glob>. This parameter operates just like the replacement
500part of a substitute command. The difference is that the C<#1> syntax
501is used to reference sub-patterns matched in the input fileglob, rather
502than the C<$1> syntax that is used with perl regular expressions. In
503this case C<#1> is used to refer to the text matched by the C<*> in the
504Input File Glob. This makes it easier to use this module where the
505parameters to C<globmap> are typed at the command line.
506
507The final step involves passing each filename matched by the C<*.tar.gz>
508file glob through the derived Perl regular expression in turn and
509expanding the output fileglob using it.
510
511The end result of all this is a list of pairs of filenames. By default
512that is what is returned by C<globmap>. In this example the data structure
513returned will look like this
514
515 ( ['alpha.tar.gz' => 'alpha.tgz'],
516 ['beta.tar.gz' => 'beta.tgz' ],
517 ['gamma.tar.gz' => 'gamma.tgz']
518 )
519
520
521Each pair is an array reference with two elements - namely the I<from>
522filename, that C<File::Glob> has matched, and a I<to> filename that is
523derived from the I<from> filename.
524
525
526
527=head2 Limitations
528
529C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
530solve all filename mapping operations. Under the hood C<File::Glob> (or for
531older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
532will never have the flexibility of full Perl regular expression.
533
534=head2 Input File Glob
535
536The syntax for an Input FileGlob is identical to C<File::Glob>, except
537for the following
538
539=over 5
540
541=item 1.
542
543No nested {}
544
545=item 2.
546
547Whitespace does not delimit fileglobs.
548
549=item 3.
550
551The use of parenthesis can be used to capture parts of the input filename.
552
553=item 4.
554
555If an Input glob matches the same file more than once, only the first
556will be used.
557
558=back
559
560The syntax
561
562=over 5
563
564=item B<~>
565
566=item B<~user>
567
568
569=item B<.>
570
571Matches a literal '.'.
572Equivalent to the Perl regular expression
573
574 \.
575
576=item B<*>
577
578Matches zero or more characters, except '/'. Equivalent to the Perl
579regular expression
580
581 [^/]*
582
583=item B<?>
584
585Matches zero or one character, except '/'. Equivalent to the Perl
586regular expression
587
588 [^/]?
589
590=item B<\>
591
592Backslash is used, as usual, to escape the next character.
593
594=item B<[]>
595
596Character class.
597
598=item B<{,}>
599
600Alternation
601
602=item B<()>
603
604Capturing parenthesis that work just like perl
605
606=back
607
608Any other character it taken literally.
609
610=head2 Output File Glob
611
612The Output File Glob is a normal string, with 2 glob-like features.
613
614The first is the '*' metacharacter. This will be replaced by the complete
615filename matched by the input file glob. So
616
617 *.c *.Z
618
619The second is
620
621Output FileGlobs take the
622
623=over 5
624
625=item "*"
626
627The "*" character will be replaced with the complete input filename.
628
629=item #1
630
631Patterns of the form /#\d/ will be replaced with the
632
633=back
634
635=head2 Returned Data
636
637
638=head1 EXAMPLES
639
640=head2 A Rename script
641
642Below is a simple "rename" script that uses C<globmap> to determine the
643source and destination filenames.
644
645 use File::GlobMapper qw(globmap) ;
646 use File::Copy;
647
648 die "rename: Usage rename 'from' 'to'\n"
649 unless @ARGV == 2 ;
650
651 my $fromGlob = shift @ARGV;
652 my $toGlob = shift @ARGV;
653
654 my $pairs = globmap($fromGlob, $toGlob)
655 or die $File::GlobMapper::Error;
656
657 for my $pair (@$pairs)
658 {
659 my ($from, $to) = @$pair;
660 move $from => $to ;
661 }
662
663
664
665Here is an example that renames all c files to cpp.
666
667 $ rename '*.c' '#1.cpp'
668
669=head2 A few example globmaps
670
671Below are a few examples of globmaps
672
673To copy all your .c file to a backup directory
674
675 '</my/home/*.c>' '</my/backup/#1.c>'
676
677If you want to compress all
678
679 '</my/home/*.[ch]>' '<*.gz>'
680
681To uncompress
682
683 '</my/home/*.[ch].gz>' '</my/home/#1.#2>'
684
685=head1 SEE ALSO
686
687L<File::Glob|File::Glob>
688
689=head1 AUTHOR
690
691The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
692
693=head1 COPYRIGHT AND LICENSE
694
695Copyright (c) 2005 Paul Marquess. All rights reserved.
696This program is free software; you can redistribute it and/or
697modify it under the same terms as Perl itself.