← Index
NYTProf Performance Profile   « block view • line view • sub view »
For bin/dpath
  Run on Tue Jun 5 15:31:33 2012
Reported on Tue Jun 5 15:31:45 2012

Filename/home/ss5/perl5/perlbrew/perls/perl-5.14.1/lib/5.14.1/Text/Balanced.pm
StatementsExecuted 25 statements in 24.7ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
1115.45ms27.9msText::Balanced::::BEGIN@9 Text::Balanced::BEGIN@9
111259µs259µsText::Balanced::::BEGIN@6 Text::Balanced::BEGIN@6
11169µs214µsText::Balanced::ErrorMsg::::BEGIN@1021 Text::Balanced::ErrorMsg::BEGIN@1021
11166µs269µsText::Balanced::::BEGIN@885 Text::Balanced::BEGIN@885
11150µs277µsText::Balanced::::BEGIN@343 Text::Balanced::BEGIN@343
11147µs47µsText::Balanced::::BEGIN@12 Text::Balanced::BEGIN@12
11134µs50µsText::Balanced::::BEGIN@7 Text::Balanced::BEGIN@7
11133µs223µsText::Balanced::::BEGIN@11 Text::Balanced::BEGIN@11
11114µs14µsText::Balanced::::BEGIN@8 Text::Balanced::BEGIN@8
0000s0sText::Balanced::ErrorMsg::::__ANON__[:1021] Text::Balanced::ErrorMsg::__ANON__[:1021]
0000s0sText::Balanced::Extractor::::extractText::Balanced::Extractor::extract
0000s0sText::Balanced::::__ANON__[:1007] Text::Balanced::__ANON__[:1007]
0000s0sText::Balanced::::__ANON__[:863] Text::Balanced::__ANON__[:863]
0000s0sText::Balanced::::__ANON__[:864] Text::Balanced::__ANON__[:864]
0000s0sText::Balanced::::__ANON__[:865] Text::Balanced::__ANON__[:865]
0000s0sText::Balanced::::_fail Text::Balanced::_fail
0000s0sText::Balanced::::_failmsg Text::Balanced::_failmsg
0000s0sText::Balanced::::_match_bracketed Text::Balanced::_match_bracketed
0000s0sText::Balanced::::_match_codeblock Text::Balanced::_match_codeblock
0000s0sText::Balanced::::_match_quotelike Text::Balanced::_match_quotelike
0000s0sText::Balanced::::_match_tagged Text::Balanced::_match_tagged
0000s0sText::Balanced::::_match_variable Text::Balanced::_match_variable
0000s0sText::Balanced::::_revbracket Text::Balanced::_revbracket
0000s0sText::Balanced::::_succeed Text::Balanced::_succeed
0000s0sText::Balanced::::extract_bracketed Text::Balanced::extract_bracketed
0000s0sText::Balanced::::extract_codeblock Text::Balanced::extract_codeblock
0000s0sText::Balanced::::extract_delimited Text::Balanced::extract_delimited
0000s0sText::Balanced::::extract_multiple Text::Balanced::extract_multiple
0000s0sText::Balanced::::extract_quotelike Text::Balanced::extract_quotelike
0000s0sText::Balanced::::extract_tagged Text::Balanced::extract_tagged
0000s0sText::Balanced::::extract_variable Text::Balanced::extract_variable
0000s0sText::Balanced::::gen_delimited_pat Text::Balanced::gen_delimited_pat
0000s0sText::Balanced::::gen_extract_tagged Text::Balanced::gen_extract_tagged
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Text::Balanced;
2
3# EXTRACT VARIOUSLY DELIMITED TEXT SEQUENCES FROM STRINGS.
4# FOR FULL DOCUMENTATION SEE Balanced.pod
5
62358µs1259µs
# spent 259µs within Text::Balanced::BEGIN@6 which was called: # once (259µs+0s) by Data::DPath::Path::BEGIN@17 at line 6
use 5.005;
# spent 259µs making 1 call to Text::Balanced::BEGIN@6
7293µs266µs
# spent 50µs (34+16) within Text::Balanced::BEGIN@7 which was called: # once (34µs+16µs) by Data::DPath::Path::BEGIN@17 at line 7
use strict;
# spent 50µs making 1 call to Text::Balanced::BEGIN@7 # spent 16µs making 1 call to strict::import
8286µs114µs
# spent 14µs within Text::Balanced::BEGIN@8 which was called: # once (14µs+0s) by Data::DPath::Path::BEGIN@17 at line 8
use Exporter ();
# spent 14µs making 1 call to Text::Balanced::BEGIN@8
92428µs228.0ms
# spent 27.9ms (5.45+22.4) within Text::Balanced::BEGIN@9 which was called: # once (5.45ms+22.4ms) by Data::DPath::Path::BEGIN@17 at line 9
use SelfLoader;
# spent 27.9ms making 1 call to Text::Balanced::BEGIN@9 # spent 143µs making 1 call to Exporter::import
10
112257µs2414µs
# spent 223µs (33+190) within Text::Balanced::BEGIN@11 which was called: # once (33µs+190µs) by Data::DPath::Path::BEGIN@17 at line 11
use vars qw { $VERSION @ISA %EXPORT_TAGS };
# spent 223µs making 1 call to Text::Balanced::BEGIN@11 # spent 190µs making 1 call to vars::import
12
# spent 47µs within Text::Balanced::BEGIN@12 which was called: # once (47µs+0s) by Data::DPath::Path::BEGIN@17 at line 29
BEGIN {
13348µs $VERSION = '2.02';
14 @ISA = 'Exporter';
15 %EXPORT_TAGS = (
16 ALL => [ qw{
17 &extract_delimited
18 &extract_bracketed
19 &extract_quotelike
20 &extract_codeblock
21 &extract_variable
22 &extract_tagged
23 &extract_multiple
24 &gen_delimited_pat
25 &gen_extract_tagged
26 &delimited_pat
27 } ],
28 );
2917.85ms147µs}
# spent 47µs making 1 call to Text::Balanced::BEGIN@12
30
31111µs186µsExporter::export_ok_tags('ALL');
# spent 86µs making 1 call to Exporter::export_ok_tags
32
33# PROTOTYPES
34
35sub _match_bracketed($$$$$$);
36sub _match_variable($$);
37sub _match_codeblock($$$$$$$);
38sub _match_quotelike($$$$);
39
40# HANDLE RETURN VALUES IN VARIOUS CONTEXTS
41
42sub _failmsg {
43 my ($message, $pos) = @_;
44 $@ = bless {
45 error => $message,
46 pos => $pos,
47 }, 'Text::Balanced::ErrorMsg';
48}
49
50sub _fail {
51 my ($wantarray, $textref, $message, $pos) = @_;
52 _failmsg $message, $pos if $message;
53 return (undef, $$textref, undef) if $wantarray;
54 return undef;
55}
56
57sub _succeed {
58 $@ = undef;
59 my ($wantarray,$textref) = splice @_, 0, 2;
60 my ($extrapos, $extralen) = @_ > 18
61 ? splice(@_, -2, 2)
62 : (0, 0);
63 my ($startlen, $oppos) = @_[5,6];
64 my $remainderpos = $_[2];
65 if ( $wantarray ) {
66 my @res;
67 while (my ($from, $len) = splice @_, 0, 2) {
68 push @res, substr($$textref, $from, $len);
69 }
70 if ( $extralen ) { # CORRECT FILLET
71 my $extra = substr($res[0], $extrapos-$oppos, $extralen, "\n");
72 $res[1] = "$extra$res[1]";
73 eval { substr($$textref,$remainderpos,0) = $extra;
74 substr($$textref,$extrapos,$extralen,"\n")} ;
75 #REARRANGE HERE DOC AND FILLET IF POSSIBLE
76 pos($$textref) = $remainderpos-$extralen+1; # RESET \G
77 } else {
78 pos($$textref) = $remainderpos; # RESET \G
79 }
80 return @res;
81 } else {
82 my $match = substr($$textref,$_[0],$_[1]);
83 substr($match,$extrapos-$_[0]-$startlen,$extralen,"") if $extralen;
84 my $extra = $extralen
85 ? substr($$textref, $extrapos, $extralen)."\n" : "";
86 eval {substr($$textref,$_[4],$_[1]+$_[5])=$extra} ; #CHOP OUT PREFIX & MATCH, IF POSSIBLE
87 pos($$textref) = $_[4]; # RESET \G
88 return $match;
89 }
90}
91
92# BUILD A PATTERN MATCHING A SIMPLE DELIMITED STRING
93
94sub gen_delimited_pat($;$) # ($delimiters;$escapes)
95{
96 my ($dels, $escs) = @_;
97 return "" unless $dels =~ /\S/;
98 $escs = '\\' unless $escs;
99 $escs .= substr($escs,-1) x (length($dels)-length($escs));
100 my @pat = ();
101 my $i;
102 for ($i=0; $i<length $dels; $i++)
103 {
104 my $del = quotemeta substr($dels,$i,1);
105 my $esc = quotemeta substr($escs,$i,1);
106 if ($del eq $esc)
107 {
108 push @pat, "$del(?:[^$del]*(?:(?:$del$del)[^$del]*)*)$del";
109 }
110 else
111 {
112 push @pat, "$del(?:[^$esc$del]*(?:$esc.[^$esc$del]*)*)$del";
113 }
114 }
115 my $pat = join '|', @pat;
116 return "(?:$pat)";
117}
118
11916µs*delimited_pat = \&gen_delimited_pat;
120
121# THE EXTRACTION FUNCTIONS
122
123sub extract_delimited (;$$$$)
124{
125 my $textref = defined $_[0] ? \$_[0] : \$_;
126 my $wantarray = wantarray;
127 my $del = defined $_[1] ? $_[1] : qq{\'\"\`};
128 my $pre = defined $_[2] ? $_[2] : '\s*';
129 my $esc = defined $_[3] ? $_[3] : qq{\\};
130 my $pat = gen_delimited_pat($del, $esc);
131 my $startpos = pos $$textref || 0;
132 return _fail($wantarray, $textref, "Not a delimited pattern", 0)
133 unless $$textref =~ m/\G($pre)($pat)/gc;
134 my $prelen = length($1);
135 my $matchpos = $startpos+$prelen;
136 my $endpos = pos $$textref;
137 return _succeed $wantarray, $textref,
138 $matchpos, $endpos-$matchpos, # MATCH
139 $endpos, length($$textref)-$endpos, # REMAINDER
140 $startpos, $prelen; # PREFIX
141}
142
143sub extract_bracketed (;$$$)
144{
145 my $textref = defined $_[0] ? \$_[0] : \$_;
146 my $ldel = defined $_[1] ? $_[1] : '{([<';
147 my $pre = defined $_[2] ? $_[2] : '\s*';
148 my $wantarray = wantarray;
149 my $qdel = "";
150 my $quotelike;
151 $ldel =~ s/'//g and $qdel .= q{'};
152 $ldel =~ s/"//g and $qdel .= q{"};
153 $ldel =~ s/`//g and $qdel .= q{`};
154 $ldel =~ s/q//g and $quotelike = 1;
155 $ldel =~ tr/[](){}<>\0-\377/[[(({{<</ds;
156 my $rdel = $ldel;
157 unless ($rdel =~ tr/[({</])}>/)
158 {
159 return _fail $wantarray, $textref,
160 "Did not find a suitable bracket in delimiter: \"$_[1]\"",
161 0;
162 }
163 my $posbug = pos;
164 $ldel = join('|', map { quotemeta $_ } split('', $ldel));
165 $rdel = join('|', map { quotemeta $_ } split('', $rdel));
166 pos = $posbug;
167
168 my $startpos = pos $$textref || 0;
169 my @match = _match_bracketed($textref,$pre, $ldel, $qdel, $quotelike, $rdel);
170
171 return _fail ($wantarray, $textref) unless @match;
172
173 return _succeed ( $wantarray, $textref,
174 $match[2], $match[5]+2, # MATCH
175 @match[8,9], # REMAINDER
176 @match[0,1], # PREFIX
177 );
178}
179
180sub _match_bracketed($$$$$$) # $textref, $pre, $ldel, $qdel, $quotelike, $rdel
181{
182 my ($textref, $pre, $ldel, $qdel, $quotelike, $rdel) = @_;
183 my ($startpos, $ldelpos, $endpos) = (pos $$textref = pos $$textref||0);
184 unless ($$textref =~ m/\G$pre/gc)
185 {
186 _failmsg "Did not find prefix: /$pre/", $startpos;
187 return;
188 }
189
190 $ldelpos = pos $$textref;
191
192 unless ($$textref =~ m/\G($ldel)/gc)
193 {
194 _failmsg "Did not find opening bracket after prefix: \"$pre\"",
195 pos $$textref;
196 pos $$textref = $startpos;
197 return;
198 }
199
200 my @nesting = ( $1 );
201 my $textlen = length $$textref;
202 while (pos $$textref < $textlen)
203 {
204 next if $$textref =~ m/\G\\./gcs;
205
206 if ($$textref =~ m/\G($ldel)/gc)
207 {
208 push @nesting, $1;
209 }
210 elsif ($$textref =~ m/\G($rdel)/gc)
211 {
212 my ($found, $brackettype) = ($1, $1);
213 if ($#nesting < 0)
214 {
215 _failmsg "Unmatched closing bracket: \"$found\"",
216 pos $$textref;
217 pos $$textref = $startpos;
218 return;
219 }
220 my $expected = pop(@nesting);
221 $expected =~ tr/({[</)}]>/;
222 if ($expected ne $brackettype)
223 {
224 _failmsg qq{Mismatched closing bracket: expected "$expected" but found "$found"},
225 pos $$textref;
226 pos $$textref = $startpos;
227 return;
228 }
229 last if $#nesting < 0;
230 }
231 elsif ($qdel && $$textref =~ m/\G([$qdel])/gc)
232 {
233 $$textref =~ m/\G[^\\$1]*(?:\\.[^\\$1]*)*(\Q$1\E)/gsc and next;
234 _failmsg "Unmatched embedded quote ($1)",
235 pos $$textref;
236 pos $$textref = $startpos;
237 return;
238 }
239 elsif ($quotelike && _match_quotelike($textref,"",1,0))
240 {
241 next;
242 }
243
244 else { $$textref =~ m/\G(?:[a-zA-Z0-9]+|.)/gcs }
245 }
246 if ($#nesting>=0)
247 {
248 _failmsg "Unmatched opening bracket(s): "
249 . join("..",@nesting)."..",
250 pos $$textref;
251 pos $$textref = $startpos;
252 return;
253 }
254
255 $endpos = pos $$textref;
256
257 return (
258 $startpos, $ldelpos-$startpos, # PREFIX
259 $ldelpos, 1, # OPENING BRACKET
260 $ldelpos+1, $endpos-$ldelpos-2, # CONTENTS
261 $endpos-1, 1, # CLOSING BRACKET
262 $endpos, length($$textref)-$endpos, # REMAINDER
263 );
264}
265
266sub _revbracket($)
267{
268 my $brack = reverse $_[0];
269 $brack =~ tr/[({</])}>/;
270 return $brack;
271}
272
27312µsmy $XMLNAME = q{[a-zA-Z_:][a-zA-Z0-9_:.-]*};
274
275sub extract_tagged (;$$$$$) # ($text, $opentag, $closetag, $pre, \%options)
276{
277 my $textref = defined $_[0] ? \$_[0] : \$_;
278 my $ldel = $_[1];
279 my $rdel = $_[2];
280 my $pre = defined $_[3] ? $_[3] : '\s*';
281 my %options = defined $_[4] ? %{$_[4]} : ();
282 my $omode = defined $options{fail} ? $options{fail} : '';
283 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
284 : defined($options{reject}) ? $options{reject}
285 : ''
286 ;
287 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
288 : defined($options{ignore}) ? $options{ignore}
289 : ''
290 ;
291
292 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
293 $@ = undef;
294
295 my @match = _match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
296
297 return _fail(wantarray, $textref) unless @match;
298 return _succeed wantarray, $textref,
299 $match[2], $match[3]+$match[5]+$match[7], # MATCH
300 @match[8..9,0..1,2..7]; # REM, PRE, BITS
301}
302
303sub _match_tagged # ($$$$$$$)
304{
305 my ($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore) = @_;
306 my $rdelspec;
307
308 my ($startpos, $opentagpos, $textpos, $parapos, $closetagpos, $endpos) = ( pos($$textref) = pos($$textref)||0 );
309
310 unless ($$textref =~ m/\G($pre)/gc)
311 {
312 _failmsg "Did not find prefix: /$pre/", pos $$textref;
313 goto failed;
314 }
315
316 $opentagpos = pos($$textref);
317
318 unless ($$textref =~ m/\G$ldel/gc)
319 {
320 _failmsg "Did not find opening tag: /$ldel/", pos $$textref;
321 goto failed;
322 }
323
324 $textpos = pos($$textref);
325
326 if (!defined $rdel)
327 {
328 $rdelspec = substr($$textref, $-[0], $+[0] - $-[0]);
329 unless ($rdelspec =~ s/\A([[(<{]+)($XMLNAME).*/ quotemeta "$1\/$2". _revbracket($1) /oes)
330 {
331 _failmsg "Unable to construct closing tag to match: $rdel",
332 pos $$textref;
333 goto failed;
334 }
335 }
336 else
337 {
338 $rdelspec = eval "qq{$rdel}" || do {
339 my $del;
340 for (qw,~ ! ^ & * ) _ + - = } ] : " ; ' > . ? / | ',)
341 { next if $rdel =~ /\Q$_/; $del = $_; last }
342 unless ($del) {
343211.8ms2503µs
# spent 277µs (50+227) within Text::Balanced::BEGIN@343 which was called: # once (50µs+227µs) by Data::DPath::Path::BEGIN@17 at line 343
use Carp;
# spent 277µs making 1 call to Text::Balanced::BEGIN@343 # spent 227µs making 1 call to Exporter::import
344 croak "Can't interpolate right delimiter $rdel"
345 }
346 eval "qq$del$rdel$del";
347 };
348 }
349
350 while (pos($$textref) < length($$textref))
351 {
352 next if $$textref =~ m/\G\\./gc;
353
354 if ($$textref =~ m/\G(\n[ \t]*\n)/gc )
355 {
356 $parapos = pos($$textref) - length($1)
357 unless defined $parapos;
358 }
359 elsif ($$textref =~ m/\G($rdelspec)/gc )
360 {
361 $closetagpos = pos($$textref)-length($1);
362 goto matched;
363 }
364 elsif ($ignore && $$textref =~ m/\G(?:$ignore)/gc)
365 {
366 next;
367 }
368 elsif ($bad && $$textref =~ m/\G($bad)/gcs)
369 {
370 pos($$textref) -= length($1); # CUT OFF WHATEVER CAUSED THE SHORTNESS
371 goto short if ($omode eq 'PARA' || $omode eq 'MAX');
372 _failmsg "Found invalid nested tag: $1", pos $$textref;
373 goto failed;
374 }
375 elsif ($$textref =~ m/\G($ldel)/gc)
376 {
377 my $tag = $1;
378 pos($$textref) -= length($tag); # REWIND TO NESTED TAG
379 unless (_match_tagged(@_)) # MATCH NESTED TAG
380 {
381 goto short if $omode eq 'PARA' || $omode eq 'MAX';
382 _failmsg "Found unbalanced nested tag: $tag",
383 pos $$textref;
384 goto failed;
385 }
386 }
387 else { $$textref =~ m/./gcs }
388 }
389
390short:
391 $closetagpos = pos($$textref);
392 goto matched if $omode eq 'MAX';
393 goto failed unless $omode eq 'PARA';
394
395 if (defined $parapos) { pos($$textref) = $parapos }
396 else { $parapos = pos($$textref) }
397
398 return (
399 $startpos, $opentagpos-$startpos, # PREFIX
400 $opentagpos, $textpos-$opentagpos, # OPENING TAG
401 $textpos, $parapos-$textpos, # TEXT
402 $parapos, 0, # NO CLOSING TAG
403 $parapos, length($$textref)-$parapos, # REMAINDER
404 );
405
406matched:
407 $endpos = pos($$textref);
408 return (
409 $startpos, $opentagpos-$startpos, # PREFIX
410 $opentagpos, $textpos-$opentagpos, # OPENING TAG
411 $textpos, $closetagpos-$textpos, # TEXT
412 $closetagpos, $endpos-$closetagpos, # CLOSING TAG
413 $endpos, length($$textref)-$endpos, # REMAINDER
414 );
415
416failed:
417 _failmsg "Did not find closing tag", pos $$textref unless $@;
418 pos($$textref) = $startpos;
419 return;
420}
421
422sub extract_variable (;$$)
423{
424 my $textref = defined $_[0] ? \$_[0] : \$_;
425 return ("","","") unless defined $$textref;
426 my $pre = defined $_[1] ? $_[1] : '\s*';
427
428 my @match = _match_variable($textref,$pre);
429
430 return _fail wantarray, $textref unless @match;
431
432 return _succeed wantarray, $textref,
433 @match[2..3,4..5,0..1]; # MATCH, REMAINDER, PREFIX
434}
435
436sub _match_variable($$)
437{
438# $#
439# $^
440# $$
441 my ($textref, $pre) = @_;
442 my $startpos = pos($$textref) = pos($$textref)||0;
443 unless ($$textref =~ m/\G($pre)/gc)
444 {
445 _failmsg "Did not find prefix: /$pre/", pos $$textref;
446 return;
447 }
448 my $varpos = pos($$textref);
449 unless ($$textref =~ m{\G\$\s*(?!::)(\d+|[][&`'+*./|,";%=~:?!\@<>()-]|\^[a-z]?)}gci)
450 {
451 unless ($$textref =~ m/\G((\$#?|[*\@\%]|\\&)+)/gc)
452 {
453 _failmsg "Did not find leading dereferencer", pos $$textref;
454 pos $$textref = $startpos;
455 return;
456 }
457 my $deref = $1;
458
459 unless ($$textref =~ m/\G\s*(?:::|')?(?:[_a-z]\w*(?:::|'))*[_a-z]\w*/gci
460 or _match_codeblock($textref, "", '\{', '\}', '\{', '\}', 0)
461 or $deref eq '$#' or $deref eq '$$' )
462 {
463 _failmsg "Bad identifier after dereferencer", pos $$textref;
464 pos $$textref = $startpos;
465 return;
466 }
467 }
468
469 while (1)
470 {
471 next if $$textref =~ m/\G\s*(?:->)?\s*[{]\w+[}]/gc;
472 next if _match_codeblock($textref,
473 qr/\s*->\s*(?:[_a-zA-Z]\w+\s*)?/,
474 qr/[({[]/, qr/[)}\]]/,
475 qr/[({[]/, qr/[)}\]]/, 0);
476 next if _match_codeblock($textref,
477 qr/\s*/, qr/[{[]/, qr/[}\]]/,
478 qr/[{[]/, qr/[}\]]/, 0);
479 next if _match_variable($textref,'\s*->\s*');
480 next if $$textref =~ m/\G\s*->\s*\w+(?![{([])/gc;
481 last;
482 }
483
484 my $endpos = pos($$textref);
485 return ($startpos, $varpos-$startpos,
486 $varpos, $endpos-$varpos,
487 $endpos, length($$textref)-$endpos
488 );
489}
490
491sub extract_codeblock (;$$$$$)
492{
493 my $textref = defined $_[0] ? \$_[0] : \$_;
494 my $wantarray = wantarray;
495 my $ldel_inner = defined $_[1] ? $_[1] : '{';
496 my $pre = defined $_[2] ? $_[2] : '\s*';
497 my $ldel_outer = defined $_[3] ? $_[3] : $ldel_inner;
498 my $rd = $_[4];
499 my $rdel_inner = $ldel_inner;
500 my $rdel_outer = $ldel_outer;
501 my $posbug = pos;
502 for ($ldel_inner, $ldel_outer) { tr/[]()<>{}\0-\377/[[((<<{{/ds }
503 for ($rdel_inner, $rdel_outer) { tr/[]()<>{}\0-\377/]]))>>}}/ds }
504 for ($ldel_inner, $ldel_outer, $rdel_inner, $rdel_outer)
505 {
506 $_ = '('.join('|',map { quotemeta $_ } split('',$_)).')'
507 }
508 pos = $posbug;
509
510 my @match = _match_codeblock($textref, $pre,
511 $ldel_outer, $rdel_outer,
512 $ldel_inner, $rdel_inner,
513 $rd);
514 return _fail($wantarray, $textref) unless @match;
515 return _succeed($wantarray, $textref,
516 @match[2..3,4..5,0..1] # MATCH, REMAINDER, PREFIX
517 );
518
519}
520
521sub _match_codeblock($$$$$$$)
522{
523 my ($textref, $pre, $ldel_outer, $rdel_outer, $ldel_inner, $rdel_inner, $rd) = @_;
524 my $startpos = pos($$textref) = pos($$textref) || 0;
525 unless ($$textref =~ m/\G($pre)/gc)
526 {
527 _failmsg qq{Did not match prefix /$pre/ at"} .
528 substr($$textref,pos($$textref),20) .
529 q{..."},
530 pos $$textref;
531 return;
532 }
533 my $codepos = pos($$textref);
534 unless ($$textref =~ m/\G($ldel_outer)/gc) # OUTERMOST DELIMITER
535 {
536 _failmsg qq{Did not find expected opening bracket at "} .
537 substr($$textref,pos($$textref),20) .
538 q{..."},
539 pos $$textref;
540 pos $$textref = $startpos;
541 return;
542 }
543 my $closing = $1;
544 $closing =~ tr/([<{/)]>}/;
545 my $matched;
546 my $patvalid = 1;
547 while (pos($$textref) < length($$textref))
548 {
549 $matched = '';
550 if ($rd && $$textref =~ m#\G(\Q(?)\E|\Q(s?)\E|\Q(s)\E)#gc)
551 {
552 $patvalid = 0;
553 next;
554 }
555
556 if ($$textref =~ m/\G\s*#.*/gc)
557 {
558 next;
559 }
560
561 if ($$textref =~ m/\G\s*($rdel_outer)/gc)
562 {
563 unless ($matched = ($closing && $1 eq $closing) )
564 {
565 next if $1 eq '>'; # MIGHT BE A "LESS THAN"
566 _failmsg q{Mismatched closing bracket at "} .
567 substr($$textref,pos($$textref),20) .
568 qq{...". Expected '$closing'},
569 pos $$textref;
570 }
571 last;
572 }
573
574 if (_match_variable($textref,'\s*') ||
575 _match_quotelike($textref,'\s*',$patvalid,$patvalid) )
576 {
577 $patvalid = 0;
578 next;
579 }
580
581
582 # NEED TO COVER MANY MORE CASES HERE!!!
583 if ($$textref =~ m#\G\s*(?!$ldel_inner)
584 ( [-+*x/%^&|.]=?
585 | [!=]~
586 | =(?!>)
587 | (\*\*|&&|\|\||<<|>>)=?
588 | split|grep|map|return
589 | [([]
590 )#gcx)
591 {
592 $patvalid = 1;
593 next;
594 }
595
596 if ( _match_codeblock($textref, '\s*', $ldel_inner, $rdel_inner, $ldel_inner, $rdel_inner, $rd) )
597 {
598 $patvalid = 1;
599 next;
600 }
601
602 if ($$textref =~ m/\G\s*$ldel_outer/gc)
603 {
604 _failmsg q{Improperly nested codeblock at "} .
605 substr($$textref,pos($$textref),20) .
606 q{..."},
607 pos $$textref;
608 last;
609 }
610
611 $patvalid = 0;
612 $$textref =~ m/\G\s*(\w+|[-=>]>|.|\Z)/gc;
613 }
614 continue { $@ = undef }
615
616 unless ($matched)
617 {
618 _failmsg 'No match found for opening bracket', pos $$textref
619 unless $@;
620 return;
621 }
622
623 my $endpos = pos($$textref);
624 return ( $startpos, $codepos-$startpos,
625 $codepos, $endpos-$codepos,
626 $endpos, length($$textref)-$endpos,
627 );
628}
629
630
631118µsmy %mods = (
632 'none' => '[cgimsox]*',
633 'm' => '[cgimsox]*',
634 's' => '[cegimsox]*',
635 'tr' => '[cds]*',
636 'y' => '[cds]*',
637 'qq' => '',
638 'qx' => '',
639 'qw' => '',
640 'qr' => '[imsx]*',
641 'q' => '',
642 );
643
644sub extract_quotelike (;$$)
645{
646 my $textref = $_[0] ? \$_[0] : \$_;
647 my $wantarray = wantarray;
648 my $pre = defined $_[1] ? $_[1] : '\s*';
649
650 my @match = _match_quotelike($textref,$pre,1,0);
651 return _fail($wantarray, $textref) unless @match;
652 return _succeed($wantarray, $textref,
653 $match[2], $match[18]-$match[2], # MATCH
654 @match[18,19], # REMAINDER
655 @match[0,1], # PREFIX
656 @match[2..17], # THE BITS
657 @match[20,21], # ANY FILLET?
658 );
659};
660
661sub _match_quotelike($$$$) # ($textref, $prepat, $allow_raw_match)
662{
663 my ($textref, $pre, $rawmatch, $qmark) = @_;
664
665 my ($textlen,$startpos,
666 $oppos,
667 $preld1pos,$ld1pos,$str1pos,$rd1pos,
668 $preld2pos,$ld2pos,$str2pos,$rd2pos,
669 $modpos) = ( length($$textref), pos($$textref) = pos($$textref) || 0 );
670
671 unless ($$textref =~ m/\G($pre)/gc)
672 {
673 _failmsg qq{Did not find prefix /$pre/ at "} .
674 substr($$textref, pos($$textref), 20) .
675 q{..."},
676 pos $$textref;
677 return;
678 }
679 $oppos = pos($$textref);
680
681 my $initial = substr($$textref,$oppos,1);
682
683 if ($initial && $initial =~ m|^[\"\'\`]|
684 || $rawmatch && $initial =~ m|^/|
685 || $qmark && $initial =~ m|^\?|)
686 {
687 unless ($$textref =~ m/ \Q$initial\E [^\\$initial]* (\\.[^\\$initial]*)* \Q$initial\E /gcsx)
688 {
689 _failmsg qq{Did not find closing delimiter to match '$initial' at "} .
690 substr($$textref, $oppos, 20) .
691 q{..."},
692 pos $$textref;
693 pos $$textref = $startpos;
694 return;
695 }
696 $modpos= pos($$textref);
697 $rd1pos = $modpos-1;
698
699 if ($initial eq '/' || $initial eq '?')
700 {
701 $$textref =~ m/\G$mods{none}/gc
702 }
703
704 my $endpos = pos($$textref);
705 return (
706 $startpos, $oppos-$startpos, # PREFIX
707 $oppos, 0, # NO OPERATOR
708 $oppos, 1, # LEFT DEL
709 $oppos+1, $rd1pos-$oppos-1, # STR/PAT
710 $rd1pos, 1, # RIGHT DEL
711 $modpos, 0, # NO 2ND LDEL
712 $modpos, 0, # NO 2ND STR
713 $modpos, 0, # NO 2ND RDEL
714 $modpos, $endpos-$modpos, # MODIFIERS
715 $endpos, $textlen-$endpos, # REMAINDER
716 );
717 }
718
719 unless ($$textref =~ m{\G(\b(?:m|s|qq|qx|qw|q|qr|tr|y)\b(?=\s*\S)|<<)}gc)
720 {
721 _failmsg q{No quotelike operator found after prefix at "} .
722 substr($$textref, pos($$textref), 20) .
723 q{..."},
724 pos $$textref;
725 pos $$textref = $startpos;
726 return;
727 }
728
729 my $op = $1;
730 $preld1pos = pos($$textref);
731 if ($op eq '<<') {
732 $ld1pos = pos($$textref);
733 my $label;
734 if ($$textref =~ m{\G([A-Za-z_]\w*)}gc) {
735 $label = $1;
736 }
737 elsif ($$textref =~ m{ \G ' ([^'\\]* (?:\\.[^'\\]*)*) '
738 | \G " ([^"\\]* (?:\\.[^"\\]*)*) "
739 | \G ` ([^`\\]* (?:\\.[^`\\]*)*) `
740 }gcsx) {
741 $label = $+;
742 }
743 else {
744 $label = "";
745 }
746 my $extrapos = pos($$textref);
747 $$textref =~ m{.*\n}gc;
748 $str1pos = pos($$textref)--;
749 unless ($$textref =~ m{.*?\n(?=\Q$label\E\n)}gc) {
750 _failmsg qq{Missing here doc terminator ('$label') after "} .
751 substr($$textref, $startpos, 20) .
752 q{..."},
753 pos $$textref;
754 pos $$textref = $startpos;
755 return;
756 }
757 $rd1pos = pos($$textref);
758 $$textref =~ m{\Q$label\E\n}gc;
759 $ld2pos = pos($$textref);
760 return (
761 $startpos, $oppos-$startpos, # PREFIX
762 $oppos, length($op), # OPERATOR
763 $ld1pos, $extrapos-$ld1pos, # LEFT DEL
764 $str1pos, $rd1pos-$str1pos, # STR/PAT
765 $rd1pos, $ld2pos-$rd1pos, # RIGHT DEL
766 $ld2pos, 0, # NO 2ND LDEL
767 $ld2pos, 0, # NO 2ND STR
768 $ld2pos, 0, # NO 2ND RDEL
769 $ld2pos, 0, # NO MODIFIERS
770 $ld2pos, $textlen-$ld2pos, # REMAINDER
771 $extrapos, $str1pos-$extrapos, # FILLETED BIT
772 );
773 }
774
775 $$textref =~ m/\G\s*/gc;
776 $ld1pos = pos($$textref);
777 $str1pos = $ld1pos+1;
778
779 unless ($$textref =~ m/\G(\S)/gc) # SHOULD USE LOOKAHEAD
780 {
781 _failmsg "No block delimiter found after quotelike $op",
782 pos $$textref;
783 pos $$textref = $startpos;
784 return;
785 }
786 pos($$textref) = $ld1pos; # HAVE TO DO THIS BECAUSE LOOKAHEAD BROKEN
787 my ($ldel1, $rdel1) = ("\Q$1","\Q$1");
788 if ($ldel1 =~ /[[(<{]/)
789 {
790 $rdel1 =~ tr/[({</])}>/;
791 defined(_match_bracketed($textref,"",$ldel1,"","",$rdel1))
792 || do { pos $$textref = $startpos; return };
793 $ld2pos = pos($$textref);
794 $rd1pos = $ld2pos-1;
795 }
796 else
797 {
798 $$textref =~ /\G$ldel1[^\\$ldel1]*(\\.[^\\$ldel1]*)*$ldel1/gcs
799 || do { pos $$textref = $startpos; return };
800 $ld2pos = $rd1pos = pos($$textref)-1;
801 }
802
803 my $second_arg = $op =~ /s|tr|y/ ? 1 : 0;
804 if ($second_arg)
805 {
806 my ($ldel2, $rdel2);
807 if ($ldel1 =~ /[[(<{]/)
808 {
809 unless ($$textref =~ /\G\s*(\S)/gc) # SHOULD USE LOOKAHEAD
810 {
811 _failmsg "Missing second block for quotelike $op",
812 pos $$textref;
813 pos $$textref = $startpos;
814 return;
815 }
816 $ldel2 = $rdel2 = "\Q$1";
817 $rdel2 =~ tr/[({</])}>/;
818 }
819 else
820 {
821 $ldel2 = $rdel2 = $ldel1;
822 }
823 $str2pos = $ld2pos+1;
824
825 if ($ldel2 =~ /[[(<{]/)
826 {
827 pos($$textref)--; # OVERCOME BROKEN LOOKAHEAD
828 defined(_match_bracketed($textref,"",$ldel2,"","",$rdel2))
829 || do { pos $$textref = $startpos; return };
830 }
831 else
832 {
833 $$textref =~ /[^\\$ldel2]*(\\.[^\\$ldel2]*)*$ldel2/gcs
834 || do { pos $$textref = $startpos; return };
835 }
836 $rd2pos = pos($$textref)-1;
837 }
838 else
839 {
840 $ld2pos = $str2pos = $rd2pos = $rd1pos;
841 }
842
843 $modpos = pos $$textref;
844
845 $$textref =~ m/\G($mods{$op})/gc;
846 my $endpos = pos $$textref;
847
848 return (
849 $startpos, $oppos-$startpos, # PREFIX
850 $oppos, length($op), # OPERATOR
851 $ld1pos, 1, # LEFT DEL
852 $str1pos, $rd1pos-$str1pos, # STR/PAT
853 $rd1pos, 1, # RIGHT DEL
854 $ld2pos, $second_arg, # 2ND LDEL (MAYBE)
855 $str2pos, $rd2pos-$str2pos, # 2ND STR (MAYBE)
856 $rd2pos, $second_arg, # 2ND RDEL (MAYBE)
857 $modpos, $endpos-$modpos, # MODIFIERS
858 $endpos, $textlen-$endpos, # REMAINDER
859 );
860}
861
862my $def_func = [
863 sub { extract_variable($_[0], '') },
864 sub { extract_quotelike($_[0],'') },
865 sub { extract_codeblock($_[0],'{}','') },
866134µs];
867
868sub extract_multiple (;$$$$) # ($text, $functions_ref, $max_fields, $ignoreunknown)
869{
870 my $textref = defined($_[0]) ? \$_[0] : \$_;
871 my $posbug = pos;
872 my ($lastpos, $firstpos);
873 my @fields = ();
874
875 #for ($$textref)
876 {
877 my @func = defined $_[1] ? @{$_[1]} : @{$def_func};
878 my $max = defined $_[2] && $_[2]>0 ? $_[2] : 1_000_000_000;
879 my $igunk = $_[3];
880
881 pos $$textref ||= 0;
882
883 unless (wantarray)
884 {
88523.26ms2472µs
# spent 269µs (66+203) within Text::Balanced::BEGIN@885 which was called: # once (66µs+203µs) by Data::DPath::Path::BEGIN@17 at line 885
use Carp;
# spent 269µs making 1 call to Text::Balanced::BEGIN@885 # spent 203µs making 1 call to Exporter::import
886 carp "extract_multiple reset maximal count to 1 in scalar context"
887 if $^W && defined($_[2]) && $max > 1;
888 $max = 1
889 }
890
891 my $unkpos;
892 my $func;
893 my $class;
894
895 my @class;
896 foreach $func ( @func )
897 {
898 if (ref($func) eq 'HASH')
899 {
900 push @class, (keys %$func)[0];
901 $func = (values %$func)[0];
902 }
903 else
904 {
905 push @class, undef;
906 }
907 }
908
909 FIELD: while (pos($$textref) < length($$textref))
910 {
911 my ($field, $rem);
912 my @bits;
913 foreach my $i ( 0..$#func )
914 {
915 my $pref;
916 $func = $func[$i];
917 $class = $class[$i];
918 $lastpos = pos $$textref;
919 if (ref($func) eq 'CODE')
920 { ($field,$rem,$pref) = @bits = $func->($$textref) }
921 elsif (ref($func) eq 'Text::Balanced::Extractor')
922 { @bits = $field = $func->extract($$textref) }
923 elsif( $$textref =~ m/\G$func/gc )
924 { @bits = $field = defined($1)
925 ? $1
926 : substr($$textref, $-[0], $+[0] - $-[0])
927 }
928 $pref ||= "";
929 if (defined($field) && length($field))
930 {
931 if (!$igunk) {
932 $unkpos = $lastpos
933 if length($pref) && !defined($unkpos);
934 if (defined $unkpos)
935 {
936 push @fields, substr($$textref, $unkpos, $lastpos-$unkpos).$pref;
937 $firstpos = $unkpos unless defined $firstpos;
938 undef $unkpos;
939 last FIELD if @fields == $max;
940 }
941 }
942 push @fields, $class
943 ? bless (\$field, $class)
944 : $field;
945 $firstpos = $lastpos unless defined $firstpos;
946 $lastpos = pos $$textref;
947 last FIELD if @fields == $max;
948 next FIELD;
949 }
950 }
951 if ($$textref =~ /\G(.)/gcs)
952 {
953 $unkpos = pos($$textref)-1
954 unless $igunk || defined $unkpos;
955 }
956 }
957
958 if (defined $unkpos)
959 {
960 push @fields, substr($$textref, $unkpos);
961 $firstpos = $unkpos unless defined $firstpos;
962 $lastpos = length $$textref;
963 }
964 last;
965 }
966
967 pos $$textref = $lastpos;
968 return @fields if wantarray;
969
970 $firstpos ||= 0;
971 eval { substr($$textref,$firstpos,$lastpos-$firstpos)="";
972 pos $$textref = $firstpos };
973 return $fields[0];
974}
975
976sub gen_extract_tagged # ($opentag, $closetag, $pre, \%options)
977{
978 my $ldel = $_[0];
979 my $rdel = $_[1];
980 my $pre = defined $_[2] ? $_[2] : '\s*';
981 my %options = defined $_[3] ? %{$_[3]} : ();
982 my $omode = defined $options{fail} ? $options{fail} : '';
983 my $bad = ref($options{reject}) eq 'ARRAY' ? join('|', @{$options{reject}})
984 : defined($options{reject}) ? $options{reject}
985 : ''
986 ;
987 my $ignore = ref($options{ignore}) eq 'ARRAY' ? join('|', @{$options{ignore}})
988 : defined($options{ignore}) ? $options{ignore}
989 : ''
990 ;
991
992 if (!defined $ldel) { $ldel = '<\w+(?:' . gen_delimited_pat(q{'"}) . '|[^>])*>'; }
993
994 my $posbug = pos;
995 for ($ldel, $pre, $bad, $ignore) { $_ = qr/$_/ if $_ }
996 pos = $posbug;
997
998 my $closure = sub
999 {
1000 my $textref = defined $_[0] ? \$_[0] : \$_;
1001 my @match = Text::Balanced::_match_tagged($textref, $pre, $ldel, $rdel, $omode, $bad, $ignore);
1002
1003 return _fail(wantarray, $textref) unless @match;
1004 return _succeed wantarray, $textref,
1005 $match[2], $match[3]+$match[5]+$match[7], # MATCH
1006 @match[8..9,0..1,2..7]; # REM, PRE, BITS
1007 };
1008
1009 bless $closure, 'Text::Balanced::Extractor';
1010}
1011
1012package Text::Balanced::Extractor;
1013
1014sub extract($$) # ($self, $text)
1015{
1016 &{$_[0]}($_[1]);
1017}
1018
1019package Text::Balanced::ErrorMsg;
1020
10212401µs2359µs
# spent 214µs (69+145) within Text::Balanced::ErrorMsg::BEGIN@1021 which was called: # once (69µs+145µs) by Data::DPath::Path::BEGIN@17 at line 1021
use overload '""' => sub { "$_[0]->{error}, detected at offset $_[0]->{pos}" };
# spent 214µs making 1 call to Text::Balanced::ErrorMsg::BEGIN@1021 # spent 145µs making 1 call to overload::import
1022
1023135µs1;
1024
1025__END__