Filename | /home/ss5/perl5/perlbrew/perls/perl-5.14.1/lib/site_perl/5.14.1/Data/DPath.pm |
Statements | Executed 20 statements in 4.60ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 12.0ms | 36.1ms | BEGIN@48 | Data::DPath::
1 | 1 | 1 | 5.08ms | 444ms | BEGIN@18 | Data::DPath::
1 | 1 | 1 | 63µs | 63µs | BEGIN@10 | Data::DPath::
1 | 1 | 1 | 41µs | 58µs | BEGIN@11 | Data::DPath::
1 | 1 | 1 | 36µs | 36µs | BEGIN@2 | Data::DPath::
1 | 1 | 1 | 29µs | 57µs | BEGIN@12 | Data::DPath::
1 | 1 | 1 | 18µs | 18µs | BEGIN@19 | Data::DPath::
1 | 1 | 1 | 18µs | 18µs | build_dpath | Data::DPath::
0 | 0 | 0 | 0s | 0s | __ANON__[:25] | Data::DPath::
0 | 0 | 0 | 0s | 0s | __ANON__[:32] | Data::DPath::
0 | 0 | 0 | 0s | 0s | __ANON__[:45] | Data::DPath::
0 | 0 | 0 | 0s | 0s | build_dpathi | Data::DPath::
0 | 0 | 0 | 0s | 0s | build_dpathr | Data::DPath::
0 | 0 | 0 | 0s | 0s | match | Data::DPath::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package Data::DPath; | ||||
2 | # spent 36µs within Data::DPath::BEGIN@2 which was called:
# once (36µs+0s) by main::BEGIN@10 at line 4 | ||||
3 | 1 | 18µs | $Data::DPath::AUTHORITY = 'cpan:SCHWIGON'; | ||
4 | 1 | 121µs | 1 | 36µs | } # spent 36µs making 1 call to Data::DPath::BEGIN@2 |
5 | { | ||||
6 | 2 | 6µs | $Data::DPath::VERSION = '0.48'; | ||
7 | } | ||||
8 | # ABSTRACT: DPath is not XPath! | ||||
9 | |||||
10 | 2 | 176µs | 1 | 63µs | # spent 63µs within Data::DPath::BEGIN@10 which was called:
# once (63µs+0s) by main::BEGIN@10 at line 10 # spent 63µs making 1 call to Data::DPath::BEGIN@10 |
11 | 2 | 92µs | 2 | 74µs | # spent 58µs (41+17) within Data::DPath::BEGIN@11 which was called:
# once (41µs+17µs) by main::BEGIN@10 at line 11 # spent 58µs making 1 call to Data::DPath::BEGIN@11
# spent 17µs making 1 call to strict::import |
12 | 2 | 207µs | 2 | 85µs | # spent 57µs (29+28) within Data::DPath::BEGIN@12 which was called:
# once (29µs+28µs) by main::BEGIN@10 at line 12 # spent 57µs making 1 call to Data::DPath::BEGIN@12
# spent 28µs making 1 call to warnings::import |
13 | |||||
14 | 1 | 1µs | our $DEBUG = 0; | ||
15 | 1 | 800ns | our $USE_SAFE = 1; | ||
16 | 1 | 600ns | our $PARALLELIZE = 0; | ||
17 | |||||
18 | 2 | 412µs | 1 | 444ms | # spent 444ms (5.08+439) within Data::DPath::BEGIN@18 which was called:
# once (5.08ms+439ms) by main::BEGIN@10 at line 18 # spent 444ms making 1 call to Data::DPath::BEGIN@18 |
19 | 2 | 868µs | 1 | 18µs | # spent 18µs within Data::DPath::BEGIN@19 which was called:
# once (18µs+0s) by main::BEGIN@10 at line 19 # spent 18µs making 1 call to Data::DPath::BEGIN@19 |
20 | |||||
21 | # spent 18µs within Data::DPath::build_dpath which was called:
# once (18µs+0s) by Sub::Exporter::default_generator at line 856 of Sub/Exporter.pm | ||||
22 | return sub ($) { | ||||
23 | my ($path_str) = @_; | ||||
24 | Data::DPath::Path->new(path => $path_str); | ||||
25 | 1 | 26µs | }; | ||
26 | } | ||||
27 | |||||
28 | sub build_dpathr { | ||||
29 | return sub ($) { | ||||
30 | my ($path_str) = @_; | ||||
31 | Data::DPath::Path->new(path => $path_str, give_references => 1); | ||||
32 | }; | ||||
33 | } | ||||
34 | |||||
35 | sub build_dpathi { | ||||
36 | return sub ($) { | ||||
37 | my ($data, $path_str) = @_; | ||||
38 | |||||
39 | Data::DPath::Context | ||||
40 | ->new | ||||
41 | ->current_points([ Data::DPath::Point->new->ref(\$data) ]) | ||||
42 | ->_search(Data::DPath::Path->new(path => "/")) | ||||
43 | ->_iter | ||||
44 | ->value; # there is always exactly one root "/" | ||||
45 | }; | ||||
46 | } | ||||
47 | |||||
48 | 1 | 44µs | 1 | 1.80ms | # spent 36.1ms (12.0+24.1) within Data::DPath::BEGIN@48 which was called:
# once (12.0ms+24.1ms) by main::BEGIN@10 at line 54 # spent 1.80ms making 1 call to Sub::Exporter::__ANON__[Sub/Exporter.pm:756] |
49 | exports => [ dpath => \&build_dpath, | ||||
50 | dpathr => \&build_dpathr, | ||||
51 | dpathi => \&build_dpathi, | ||||
52 | ], | ||||
53 | groups => { all => [ 'dpath', 'dpathr' ] }, | ||||
54 | 1 | 2.60ms | 1 | 36.1ms | }; # spent 36.1ms making 1 call to Data::DPath::BEGIN@48 |
55 | |||||
56 | sub match { | ||||
57 | my ($class, $data, $path_str) = @_; | ||||
58 | Data::DPath::Path->new(path => $path_str)->match($data); | ||||
59 | } | ||||
60 | |||||
61 | # ------------------------------------------------------------ | ||||
62 | |||||
63 | 1 | 19µs | 1; | ||
64 | |||||
- - | |||||
67 | =pod | ||||
68 | |||||
69 | =encoding utf-8 | ||||
70 | |||||
71 | =head1 NAME | ||||
72 | |||||
73 | Data::DPath - DPath is not XPath! | ||||
74 | |||||
75 | =head1 SYNOPSIS | ||||
76 | |||||
77 | use Data::DPath 'dpath'; | ||||
78 | my $data = { | ||||
79 | AAA => { BBB => { CCC => [ qw/ XXX YYY ZZZ / ] }, | ||||
80 | RRR => { CCC => [ qw/ RR1 RR2 RR3 / ] }, | ||||
81 | DDD => { EEE => [ qw/ uuu vvv www / ] }, | ||||
82 | }, | ||||
83 | }; | ||||
84 | |||||
85 | # Perl 5.8 style | ||||
86 | @resultlist = dpath('/AAA/*/CCC')->match($data); # ( ['XXX', 'YYY', 'ZZZ'], [ 'RR1', 'RR2', 'RR3' ] ) | ||||
87 | |||||
88 | # Perl 5.10 style using overloaded smartmatch operator | ||||
89 | $resultlist = $data ~~ dpath '/AAA/*/CCC'; # [ ['XXX', 'YYY', 'ZZZ'], [ 'RR1', 'RR2', 'RR3' ] ] | ||||
90 | |||||
91 | Note that the C<match()> function returns an array but the overloaded | ||||
92 | C<~~> operator returns an array reference (that's a limitation of | ||||
93 | overloading). | ||||
94 | |||||
95 | Various other example paths from C<t/data_dpath.t> (not neccessarily | ||||
96 | fitting to above data structure): | ||||
97 | |||||
98 | $data ~~ dpath '/AAA/*/CCC' | ||||
99 | $data ~~ dpath '/AAA/BBB/CCC/../..' # parents (..) | ||||
100 | $data ~~ dpath '//AAA' # anywhere (//) | ||||
101 | $data ~~ dpath '//AAA/*' # anywhere + anystep | ||||
102 | $data ~~ dpath '//AAA/*[size == 3]' # filter by arrays/hash size | ||||
103 | $data ~~ dpath '//AAA/*[size != 3]' # filter by arrays/hash size | ||||
104 | $data ~~ dpath '/"EE/E"/CCC' # quote strange keys | ||||
105 | $data ~~ dpath '/AAA/BBB/CCC/*[1]' # filter by array index | ||||
106 | $data ~~ dpath '/AAA/BBB/CCC/*[ idx == 1 ]' # same, filter by array index | ||||
107 | $data ~~ dpath '//AAA/BBB/*[key eq "CCC"]' # filter by exact keys | ||||
108 | $data ~~ dpath '//AAA/*[ key =~ /CC/ ]' # filter by regex matching keys | ||||
109 | $data ~~ dpath '//CCC/*[ value eq "RR2" ]' # filter by values of hashes | ||||
110 | |||||
111 | See full details in C<t/data_dpath.t>. | ||||
112 | |||||
113 | You can get references into the C<$data> data structure by using C<dpathr>: | ||||
114 | |||||
115 | $data ~~ dpathr '//AAA/BBB/*' | ||||
116 | # etc. | ||||
117 | |||||
118 | You can request iterators to do incremental searches using C<dpathi>: | ||||
119 | |||||
120 | my $benchmarks_iter = dpathi($data)->isearch("//Benchmark"); | ||||
121 | while ($benchmarks_iter->isnt_exhausted) | ||||
122 | { | ||||
123 | my $benchmark = $benchmarks_iter->value; | ||||
124 | my $ancestors_iter = $benchmark->isearch ("/::ancestor"); | ||||
125 | while ($ancestors_iter->isnt_exhausted) | ||||
126 | { | ||||
127 | my $ancestor = $ancestors_iter->value; | ||||
128 | print Dumper( $ancestor->deref ); | ||||
129 | } | ||||
130 | } | ||||
131 | |||||
132 | This finds all elements anywhere behind a key "Benchmark" and for each | ||||
133 | one found print all its ancestors, respectively. See also chapter | ||||
134 | L<Iterator style|/"Iterator style">. | ||||
135 | |||||
136 | =head1 ABOUT | ||||
137 | |||||
138 | With this module you can address points in a datastructure by | ||||
139 | describing a "path" to it using hash keys, array indexes or some | ||||
140 | wildcard-like steps. It is inspired by XPath but differs from it. | ||||
141 | |||||
142 | =head2 Why not XPath? | ||||
143 | |||||
144 | XPath is for XML. DPath is for data structures, with a stronger Perl | ||||
145 | focus. | ||||
146 | |||||
147 | Although XML documents are data structures, they are special. | ||||
148 | |||||
149 | Elements in XML always have an order which is in contrast to hash keys | ||||
150 | in Perl. | ||||
151 | |||||
152 | XML elements names on same level can be repeated, not so in hashes. | ||||
153 | |||||
154 | XML element names are more limited than arbitrary strange hash keys. | ||||
155 | |||||
156 | XML elements can have attributes and those can be addressed by XPath; | ||||
157 | Perl data structures do not need this. On the other side, data | ||||
158 | structures in Perl can contain blessed elements, DPath can address | ||||
159 | this. | ||||
160 | |||||
161 | XML has namespaces, data structures have not. | ||||
162 | |||||
163 | Arrays starting with index 1 as in XPath would be confusing to read | ||||
164 | for data structures. | ||||
165 | |||||
166 | DPath allows filter expressions that are in fact just Perl expressions | ||||
167 | not an own sub language as in XPath. | ||||
168 | |||||
169 | =head2 Comparison with Data::Path | ||||
170 | |||||
171 | There is a similar approach on CPAN, L<Data::Path|Data::Path>. Here is | ||||
172 | a comparison matrix between L<Data::Path|Data::Path> and | ||||
173 | L<Data::DPath|Data::DPath>. | ||||
174 | |||||
175 | (Warning: B<alpha> grade comparison ahead, not yet fully verified, | ||||
176 | only evaluated by reading the source. Speed comparison not really | ||||
177 | benchmarked.) | ||||
178 | |||||
179 | --------------------------------------------------------------------- | ||||
180 | Criteria Data::Path Data::DPath | ||||
181 | --------------------------------------------------------------------- | ||||
182 | |||||
183 | real XPath syntax no no | ||||
184 | |||||
185 | --------------------------------------------------------------------- | ||||
186 | |||||
187 | allow strange, YES YES | ||||
188 | non-xml but | ||||
189 | perl-like although | ||||
190 | hash keys limited, | ||||
191 | see next | ||||
192 | --------------------------------------------------------------------- | ||||
193 | |||||
194 | allows special no YES | ||||
195 | chars of own | ||||
196 | path syntax in you can quoting everything | ||||
197 | hash keys | ||||
198 | ("/[]|*.") | ||||
199 | |||||
200 | --------------------------------------------------------------------- | ||||
201 | |||||
202 | call subs in YES no | ||||
203 | data structure, | ||||
204 | like: | ||||
205 | /method() | ||||
206 | --------------------------------------------------------------------- | ||||
207 | |||||
208 | callbacks on YES no | ||||
209 | not found keys | ||||
210 | |||||
211 | --------------------------------------------------------------------- | ||||
212 | |||||
213 | element "//" no YES | ||||
214 | for "ANYWHERE" | ||||
215 | (//foo/bar) | ||||
216 | |||||
217 | --------------------------------------------------------------------- | ||||
218 | |||||
219 | element "." no YES | ||||
220 | for "NOSTEP" or | ||||
221 | "actual position" | ||||
222 | (/.[filter expr]) | ||||
223 | |||||
224 | --------------------------------------------------------------------- | ||||
225 | |||||
226 | element ".." no YES | ||||
227 | for "PARENT" | ||||
228 | (//foo/..) | ||||
229 | |||||
230 | --------------------------------------------------------------------- | ||||
231 | |||||
232 | element "::ancestor" no YES | ||||
233 | for "ANCESTOR" | ||||
234 | (//foo/::ancestor) | ||||
235 | |||||
236 | --------------------------------------------------------------------- | ||||
237 | |||||
238 | element no YES | ||||
239 | "::ancestor-or-self" | ||||
240 | |||||
241 | --------------------------------------------------------------------- | ||||
242 | |||||
243 | element "*" no YES | ||||
244 | for "ANYSTEP" or | ||||
245 | "all subelements" | ||||
246 | (/foo/*) | ||||
247 | |||||
248 | --------------------------------------------------------------------- | ||||
249 | |||||
250 | array access YES YES | ||||
251 | like /foo[4] | ||||
252 | although including negative indexes | ||||
253 | limited and whitespace awareness | ||||
254 | |||||
255 | --------------------------------------------------------------------- | ||||
256 | |||||
257 | complex no YES | ||||
258 | filter expressions | ||||
259 | like full Perl expressions | ||||
260 | /foo[size == 3] or plus sugar functions | ||||
261 | /.[isa("Foo::Bar")] | ||||
262 | |||||
263 | --------------------------------------------------------------------- | ||||
264 | |||||
265 | works with YES YES | ||||
266 | blessed subelements | ||||
267 | |||||
268 | --------------------------------------------------------------------- | ||||
269 | |||||
270 | arrays start YES YES | ||||
271 | with index 0 | ||||
272 | (in contrast | ||||
273 | to 1 as in XPath) | ||||
274 | |||||
275 | --------------------------------------------------------------------- | ||||
276 | |||||
277 | array semantics /foo[2] /foo/*[2] | ||||
278 | is a bit different | ||||
279 | |||||
280 | --------------------------------------------------------------------- | ||||
281 | |||||
282 | handling of croak RETURN EMPTY | ||||
283 | not matching | ||||
284 | paths but can be | ||||
285 | overwritten | ||||
286 | as callback | ||||
287 | |||||
288 | --------------------------------------------------------------------- | ||||
289 | |||||
290 | usage sugar none overloaded '~~' operator | ||||
291 | |||||
292 | --------------------------------------------------------------------- | ||||
293 | |||||
294 | Speed FAST quite fast | ||||
295 | |||||
296 | - raw Perl - probably comparable | ||||
297 | - considered fast speed with expressions | ||||
298 | that Data::Path handles | ||||
299 | - slower on fuzzy paths, | ||||
300 | eg. with many "//" in it | ||||
301 | |||||
302 | --------------------------------------------------------------------- | ||||
303 | |||||
304 | Perl Versions 5.6+ 5.8+ | ||||
305 | |||||
306 | --------------------------------------------------------------------- | ||||
307 | |||||
308 | Install chance 100% 90% | ||||
309 | (http://deps | ||||
310 | .cpantesters | ||||
311 | .org) | ||||
312 | |||||
313 | --------------------------------------------------------------------- | ||||
314 | |||||
315 | =head3 Summary | ||||
316 | |||||
317 | Generally L<Data::Path|Data::Path> is for simpler use cases but does | ||||
318 | not suffer from surrounding meta problems: it has no dependencies, is | ||||
319 | fast and works on practically every Perl version. | ||||
320 | |||||
321 | Whereas L<Data::DPath|Data::DPath> provides more XPath-alike features, | ||||
322 | but isn't quite as fast and has more dependencies. | ||||
323 | |||||
324 | =head1 Security warning | ||||
325 | |||||
326 | B<Watch out!> This module C<eval>s parts of provided dpaths (in | ||||
327 | particular: the filter expressions). Don't use it if you don't trust | ||||
328 | your paths. | ||||
329 | |||||
330 | Since v0.41 the filter expressions are secured using L<Safe.pm|Safe> | ||||
331 | to only allow basic Perl core ops. This provides more safety but is | ||||
332 | also significantly slower. To unrestrict this to pre-v0.41 raw C<eval> | ||||
333 | behaviour you can set C<$Data::DPath::USE_SAFE> to False: | ||||
334 | |||||
335 | local $Data::DPath::USE_SAFE; | ||||
336 | # dpath '//CCC//*[ unsecure_perl_expression ]' | ||||
337 | |||||
338 | Read L<Safe.pm|Safe> to understand how secure this is. | ||||
339 | |||||
340 | =head1 FUNCTIONS | ||||
341 | |||||
342 | =head2 dpath( $path_str ) | ||||
343 | |||||
344 | Meant as the front end function for everyday use of Data::DPath. It | ||||
345 | takes a path string and returns a C<Data::DPath::Path> object on which | ||||
346 | the match method can be called with data structures and the operator | ||||
347 | C<~~> is overloaded. | ||||
348 | |||||
349 | The function is prototyped to take exactly one argument so that you | ||||
350 | can omit the parens in many cases. | ||||
351 | |||||
352 | See SYNOPSIS. | ||||
353 | |||||
354 | =head2 dpathr( $path_str ) | ||||
355 | |||||
356 | Same as C<dpath> but toggles that results are references to the | ||||
357 | matched points in the data structure. | ||||
358 | |||||
359 | =head2 dpathi( $data ) | ||||
360 | |||||
361 | This is a different, iterator style, approach. | ||||
362 | |||||
363 | You provide the data structure on which to work and get back a current | ||||
364 | context containing the root element (as if you had searched for the | ||||
365 | path C</>), and now you can do incremental searches using C<isearch>. | ||||
366 | |||||
367 | See chapter L<Iterator style|/"Iterator style"> below for details. | ||||
368 | |||||
369 | =head1 API METHODS | ||||
370 | |||||
371 | =head2 match( $data, $path ) | ||||
372 | |||||
373 | Returns an array of all values in C<$data> that match the C<$path>. | ||||
374 | |||||
375 | =head1 OPERATOR | ||||
376 | |||||
377 | =head2 ~~ | ||||
378 | |||||
379 | Does a C<match> of a dpath against a data structure. | ||||
380 | |||||
381 | Due to the B<matching> nature of DPath the operator C<~~> should make | ||||
382 | your code more readable. | ||||
383 | |||||
384 | =head1 THE DPATH LANGUAGE | ||||
385 | |||||
386 | =head2 Synopsis | ||||
387 | |||||
388 | /AAA/BBB/CCC | ||||
389 | /AAA/*/CCC | ||||
390 | //CCC/* | ||||
391 | //CCC/*[2] | ||||
392 | //CCC/*[size == 3] | ||||
393 | //CCC/*[size != 3] | ||||
394 | /"EE/E"/CCC | ||||
395 | /AAA/BBB/CCC/*[1] | ||||
396 | /AAA/BBB/CCC/*[ idx == 1 ] | ||||
397 | //AAA/BBB/*[key eq "CCC"] | ||||
398 | //AAA/*[ key =~ /CC/ ] | ||||
399 | //CCC/*[value eq "RR2"] | ||||
400 | //.[ size == 4 ] | ||||
401 | /.[ isa("Funky::Stuff") ]/.[ size == 5 ]/.[ reftype eq "ARRAY" ] | ||||
402 | |||||
403 | =head2 Modeled on XPath | ||||
404 | |||||
405 | The basic idea is that of XPath: define a way through a datastructure | ||||
406 | and allow some funky ways to describe fuzzy ways. The syntax is | ||||
407 | roughly looking like XPath but in fact have not much more in common. | ||||
408 | |||||
409 | =head3 Some wording | ||||
410 | |||||
411 | I call the whole path a, well, B<path>. | ||||
412 | |||||
413 | It consists of single (B<path>) B<steps> that are divided by the path | ||||
414 | separator C</>. | ||||
415 | |||||
416 | Each step can have a B<filter> appended in brackets C<[]> that narrows | ||||
417 | down the matching set of results. | ||||
418 | |||||
419 | Additional functions provided inside the filters are called, well, | ||||
420 | B<filter functions>. | ||||
421 | |||||
422 | Each step has a set of B<point>s relative to the set of points before | ||||
423 | this step, all starting at the root of the data structure. | ||||
424 | |||||
425 | =head2 Special elements | ||||
426 | |||||
427 | =over 4 | ||||
428 | |||||
429 | =item C<//> | ||||
430 | |||||
431 | Anchors to any hash or array inside the data structure below the | ||||
432 | currently found points (or the root). | ||||
433 | |||||
434 | Typically used at the start of a path to anchor the path anywhere | ||||
435 | instead of only the root node: | ||||
436 | |||||
437 | //FOO/BAR | ||||
438 | |||||
439 | but can also happen inside paths to skip middle parts: | ||||
440 | |||||
441 | /AAA/BBB//FARAWAY | ||||
442 | |||||
443 | This allows any way between C<BBB> and C<FARAWAY>. | ||||
444 | |||||
445 | =item C<*> | ||||
446 | |||||
447 | Matches one step of any value relative to the current points (or the | ||||
448 | root). This step might be any hash key or all values of an array in | ||||
449 | the step before. | ||||
450 | |||||
451 | =item C<..> | ||||
452 | |||||
453 | Matches the parent element relative to the current points. | ||||
454 | |||||
455 | =item C<::ancestor> | ||||
456 | |||||
457 | Matches all ancestors (parent, grandparent, etc.) of the current node. | ||||
458 | |||||
459 | =item C<::ancestor-or-self> | ||||
460 | |||||
461 | Matches all ancestors (parent, grandparent, etc.) of the current node | ||||
462 | and the current node itself. | ||||
463 | |||||
464 | =item C<.> | ||||
465 | |||||
466 | A "no step". This keeps passively at the current points, but allows | ||||
467 | incrementally attaching filters to points or to otherwise hard to | ||||
468 | reach steps, like the top root element C</>. So you can do: | ||||
469 | |||||
470 | /.[ FILTER ] | ||||
471 | |||||
472 | or chain filters: | ||||
473 | |||||
474 | /AAA/BBB/.[ filter1 ]/.[ filter2 ]/.[ filter3 ] | ||||
475 | |||||
476 | This way you do not need to stuff many filters together into one huge | ||||
477 | killer expression and can more easily maintain them. | ||||
478 | |||||
479 | See L<Filters|Filters> for more details on filters. | ||||
480 | |||||
481 | =item If you need those special elements to be not special but as | ||||
482 | key names, just quote them: | ||||
483 | |||||
484 | /"*"/ | ||||
485 | /"*"[ filter ]/ | ||||
486 | /"::ancestor"/ | ||||
487 | /".."/ | ||||
488 | /".."[ filter ]/ | ||||
489 | /"."/ | ||||
490 | /"."[ filter ]/ | ||||
491 | /"//"/ | ||||
492 | /"//"[ filter ]/ | ||||
493 | |||||
494 | =back | ||||
495 | |||||
496 | =head2 Difference between C</step[filter]> vs. C</step/.[filter]> | ||||
497 | vs. C</step/*[filter]> | ||||
498 | |||||
499 | The filter applies to the matched points of the step to which it is | ||||
500 | applied, therefore C</part[filter]> is the normal form, but see below | ||||
501 | how this affects array access. | ||||
502 | |||||
503 | The "no step" "/." stays on the current step, therefore | ||||
504 | C</part/.[filter]> should be the same as C</part[filter]>. | ||||
505 | |||||
506 | Lastly, C</part/*[filter]> means: take all the sub elements ("*") | ||||
507 | B<below> "step" and apply the filter to those. The most common use is | ||||
508 | to take "all" elements of an array and chose one element via index: | ||||
509 | C</step/*[4]/>. This takes the fifth element of the array inside | ||||
510 | "step". This is explained in even more depth in the next section. | ||||
511 | |||||
512 | =head2 Difference between C</affe[2]> vs. C</affe/*[2]> | ||||
513 | |||||
514 | B<Read carefully.> This is different from what you probably expect | ||||
515 | when you know XPath. | ||||
516 | |||||
517 | In B<XPath> "/affe[2]" would address an item of all elements named | ||||
518 | "affe" on this step. This is because in XPath elements with the same | ||||
519 | name can be repeated, like this: | ||||
520 | |||||
521 | <coolanimals> | ||||
522 | <affe>Pavian</affe> | ||||
523 | <affe>Gorilla</affe> | ||||
524 | <affe>Schimpanse</affe> | ||||
525 | </coolanimals> | ||||
526 | |||||
527 | and "//affe[2]" would get "Schimpanse" (we ignore the fact that in | ||||
528 | XPath array indexes start with 1, not 0 as in DPath, so we would | ||||
529 | actually get "Gorilla"; anyway, both are funky fellows). | ||||
530 | |||||
531 | So what does "/affe[2]" return in DPath? Nothing! It makes no sense, | ||||
532 | because "affe" is interpreted as a hash key and hash keys can not | ||||
533 | repeat in Perl data structures. | ||||
534 | |||||
535 | So what you often want in DPath is to look at the elements B<below> | ||||
536 | "affe" and takes the third of them, e.g. in such a structure: | ||||
537 | |||||
538 | { affe => [ | ||||
539 | 'Pavian', | ||||
540 | 'Gorilla', | ||||
541 | 'Schimpanse' | ||||
542 | ] | ||||
543 | } | ||||
544 | |||||
545 | the path "/affe/*[2]" would return "Schimpanse". | ||||
546 | |||||
547 | =head2 Filters | ||||
548 | |||||
549 | Filters are conditions in brackets. They apply to all elements that | ||||
550 | are directly found by the path part to which the filter is appended. | ||||
551 | |||||
552 | Internally the filter condition is part of a C<grep> construct | ||||
553 | (exception: single integers, they choose array elements). See below. | ||||
554 | |||||
555 | Examples: | ||||
556 | |||||
557 | =over 4 | ||||
558 | |||||
559 | =item C</FOO/*[2]/> | ||||
560 | |||||
561 | A single integer as filter means choose an element from an array. So | ||||
562 | the C<*> finds all subelements that follow current step C<FOO> and the | ||||
563 | C<[2]> reduces them to only the third element (index starts at 0). | ||||
564 | |||||
565 | =item C</FOO/*[ idx == 2 ]/> | ||||
566 | |||||
567 | The C<*> is a step that matches all elements after C<FOO>, but with | ||||
568 | the filter only those elements are chosen that are of index 2. This is | ||||
569 | actually the same as just C</FOO/*[2]>. | ||||
570 | |||||
571 | =item C</FOO/*[key eq "CCC"]> | ||||
572 | |||||
573 | In all elements after C<FOO> it matches only those elements whose key | ||||
574 | is "CCC". | ||||
575 | |||||
576 | =item C</FOO/*[key =~ /CCC/ ]> | ||||
577 | |||||
578 | In all elements after step C<FOO> it matches only those elements whose | ||||
579 | key matches the regex C</CCC/>. It is actually just Perl code inside | ||||
580 | the filter which works in a grep{}-like context. | ||||
581 | |||||
582 | =item C<//FOO/*[value eq "RR2"]> | ||||
583 | |||||
584 | Find elements below C<FOO> that have the value C<RR2>. | ||||
585 | |||||
586 | Combine this with the parent step C<..>: | ||||
587 | |||||
588 | =item C<//FOO/*[value eq "RR2"]/..> | ||||
589 | |||||
590 | Find such an element below C<FOO> where an element with value C<RR2> | ||||
591 | is contained. | ||||
592 | |||||
593 | =item C<//FOO[size E<gt>= 3]> | ||||
594 | |||||
595 | Find C<FOO> elements that are arrays or hashes of size 3 or bigger. | ||||
596 | |||||
597 | =back | ||||
598 | |||||
599 | =head2 Filter functions | ||||
600 | |||||
601 | The filter condition is internally part of a C<grep> over the current | ||||
602 | subset of values. So you can write any condition like in a grep and | ||||
603 | also use the variable C<$_>. | ||||
604 | |||||
605 | Additional filter functions are available that are usually written to | ||||
606 | use $_ by default. See L<Data::DPath::Filters|Data::DPath::Filters> | ||||
607 | for complete list of available filter functions. | ||||
608 | |||||
609 | Here are some of them: | ||||
610 | |||||
611 | =over 4 | ||||
612 | |||||
613 | =item idx | ||||
614 | |||||
615 | Returns the current index inside array elements. | ||||
616 | |||||
617 | Please note that the current matching elements might not be in a | ||||
618 | defined order if resulting from anything else than arrays. | ||||
619 | |||||
620 | =item size | ||||
621 | |||||
622 | Returns the size of the current element. If it is an arrayref it | ||||
623 | returns number of elements, if it's a hashref it returns number of | ||||
624 | keys, if it's a scalar it returns 1, everything else returns -1. | ||||
625 | |||||
626 | =item key | ||||
627 | |||||
628 | Returns the key of the current element if it is a hashref. Else it | ||||
629 | returns undef. | ||||
630 | |||||
631 | =item value | ||||
632 | |||||
633 | Returns the value of the current element. If it is a hashref, return | ||||
634 | the value. If a scalar, return the scalar. Else return undef. | ||||
635 | |||||
636 | =back | ||||
637 | |||||
638 | =head2 Special characters | ||||
639 | |||||
640 | There are 4 special characters: the slash C</>, paired brackets C<[]>, | ||||
641 | the double-quote C<"> and the backslash C<\>. They are needed and | ||||
642 | explained in a logical order. | ||||
643 | |||||
644 | Path parts are divided by the slash </>. | ||||
645 | |||||
646 | A path part can be extended by a filter with appending an expression | ||||
647 | in brackets C<[]>. | ||||
648 | |||||
649 | To contain slashes in hash keys, they can be surrounded by double | ||||
650 | quotes C<">. | ||||
651 | |||||
652 | To contain double-quotes in hash keys they can be escaped with | ||||
653 | backslash C<\>. | ||||
654 | |||||
655 | Backslashes in path parts don't need to be escaped, except before | ||||
656 | escaped quotes (but see below on L<Backslash handling|Backslash | ||||
657 | handling>). | ||||
658 | |||||
659 | Filters of parts are already sufficiently divided by the brackets | ||||
660 | C<[]>. There is no need to handle special characters in them, not even | ||||
661 | double-quotes. The filter expression just needs to be balanced on the | ||||
662 | brackets. | ||||
663 | |||||
664 | So this is the order how to create paths: | ||||
665 | |||||
666 | =over 4 | ||||
667 | |||||
668 | =item 1. backslash double-quotes that are part of the key | ||||
669 | |||||
670 | =item 2. put double-quotes around the resulting key | ||||
671 | |||||
672 | =item 3. append the filter expression after the key | ||||
673 | |||||
674 | =item 4. separate several path parts with slashes | ||||
675 | |||||
676 | =back | ||||
677 | |||||
678 | =head2 Backslash handling | ||||
679 | |||||
680 | If you know backslash in Perl strings, skip this paragraph, it should | ||||
681 | be the same. | ||||
682 | |||||
683 | It is somewhat difficult to create a backslash directly before a | ||||
684 | quoted double-quote. | ||||
685 | |||||
686 | Inside the DPath language the typical backslash rules of apply that | ||||
687 | you already know from Perl B<single quoted> strings. The challenge is | ||||
688 | to specify such strings inside Perl programs where another layer of | ||||
689 | this backslashing applies. | ||||
690 | |||||
691 | Without quotes it's all easy. Both a single backslash C<\> and a | ||||
692 | double backslash C<\\> get evaluated to a single backslash C<\>. | ||||
693 | |||||
694 | Extreme edge case by example: To specify a plain hash key like this: | ||||
695 | |||||
696 | "EE\E5\" | ||||
697 | |||||
698 | where the quotes are part of the key, you need to escape the quotes | ||||
699 | and the backslash: | ||||
700 | |||||
701 | \"EE\E5\\\" | ||||
702 | |||||
703 | Now put quotes around that to use it as DPath hash key: | ||||
704 | |||||
705 | "\"EE\E5\\\"" | ||||
706 | |||||
707 | and if you specify this in a Perl program you need to additionally | ||||
708 | escape the backslashes (i.e., double their count): | ||||
709 | |||||
710 | "\"EE\E5\\\\\\"" | ||||
711 | |||||
712 | As you can see, strangely, this backslash escaping is only needed on | ||||
713 | backslashes that are not standing alone. The first backslash before | ||||
714 | the first escaped double-quote is ok to be a single backslash. | ||||
715 | |||||
716 | All strange, isn't it? At least it's (hopefully) consistent with | ||||
717 | something you know (Perl, Shell, etc.). | ||||
718 | |||||
719 | =head1 Iterator style | ||||
720 | |||||
721 | The I<iterator style> approach is an alternative to the already | ||||
722 | describe I<get-all-results-at-once> approach. With it you iterate over | ||||
723 | the results one by one and even allow relative sub searches on | ||||
724 | each. The iterators use the L<Iterator|Iterator> API. | ||||
725 | |||||
726 | Please note, that the iterators do B<not> save memory, they are just | ||||
727 | holding the context to go step-by-step and to start subsequent | ||||
728 | searches. Each iterator needs to evaluate its whole result set | ||||
729 | first. So in fact with nested iterators your memory might even go up. | ||||
730 | |||||
731 | =head2 Basic usage by example | ||||
732 | |||||
733 | Initialize a DPath iterator on a data structure using: | ||||
734 | |||||
735 | my $root = dpathi($data); | ||||
736 | |||||
737 | Create a new iterator context, with the path relative to current | ||||
738 | root context: | ||||
739 | |||||
740 | my $affe_iter = $root->isearch("//anywhere/affe"); | ||||
741 | |||||
742 | Iterate over affe results: | ||||
743 | |||||
744 | while ($affe_iter->isnt_exhausted) | ||||
745 | { | ||||
746 | my $affe_point = $affe_iter->value; # next "affe" point | ||||
747 | my $affe = $affe_point->deref; # the actual "affe" | ||||
748 | } | ||||
749 | |||||
750 | =head2 Nested iterators example | ||||
751 | |||||
752 | This example is taken from the | ||||
753 | L<Benchmark::Perl::Formance|Benchmark::Perl::Formance> suite, where | ||||
754 | the several plugins are allowed to provide their results anywhere | ||||
755 | at any level down in the result hash. | ||||
756 | |||||
757 | When the results are printed we look for all keys C<Benchmark> and | ||||
758 | regenerate the path to each so we can name it accordingly, e.g., | ||||
759 | C<plugin.name.subname>. | ||||
760 | |||||
761 | For this we need an iterator to get the single C<Benchmark> points one | ||||
762 | by one and evaluate the corresponding ancestors to fetch their hash | ||||
763 | keys. Here is the code: | ||||
764 | |||||
765 | my $benchmarks_iter = dpathi($results)->isearch("//Benchmark"); | ||||
766 | while ($benchmarks_iter->isnt_exhausted) | ||||
767 | { | ||||
768 | my $benchmark = $benchmarks_iter->value; | ||||
769 | my $ancestors_iter = $benchmark->isearch ("/::ancestor"); | ||||
770 | while ($ancestors_iter->isnt_exhausted) | ||||
771 | { | ||||
772 | my $ancestor = $ancestors_iter->value; | ||||
773 | print Dumper( $ancestor->deref ); #(1) | ||||
774 | print $ancestor->first_point->{attrs}{key}; #(2) | ||||
775 | } | ||||
776 | } | ||||
777 | |||||
778 | Note that we have two iterators, the first one (C<$benchmarks_iter>) | ||||
779 | over the actual benchmark results and the second one | ||||
780 | (C<$ancestors_iter>) over the ancestors relative to one benchmark. | ||||
781 | |||||
782 | In line B<#(1)> you can see that once you have the searched point, | ||||
783 | here the ancestors, you get the actual data using | ||||
784 | C<< $iterator->value->deref >>. | ||||
785 | |||||
786 | The line B<#(2)> is utilizing the internal data structure to find out | ||||
787 | about the actual hash key under which the point is located. (There is | ||||
788 | also an official API to that: C<< $ancestor->first_point->attrs->key >>, | ||||
789 | but there it's neccessary to check for undefined values before | ||||
790 | calling the methods F<attrs> and F<key>, so I went the easy way). | ||||
791 | |||||
792 | =head1 INTERNAL METHODS | ||||
793 | |||||
794 | To make pod coverage happy. | ||||
795 | |||||
796 | =head2 build_dpath | ||||
797 | |||||
798 | Prepares internal attributes for I<dpath>. | ||||
799 | |||||
800 | =head2 build_dpathr | ||||
801 | |||||
802 | Prepares internal attributes for I<dpathr>. | ||||
803 | |||||
804 | =head2 build_dpathi | ||||
805 | |||||
806 | Prepares internal attributes for I<dpathi>. | ||||
807 | |||||
808 | =head1 AUTHOR | ||||
809 | |||||
810 | Steffen Schwigon, C<< <schwigon at cpan.org> >> | ||||
811 | |||||
812 | =head1 CONTRIBUTIONS | ||||
813 | |||||
814 | Florian Ragwitz (cleaner exports, $_ scoping, general perl consultant) | ||||
815 | |||||
816 | =head1 SEE ALSO | ||||
817 | |||||
818 | There are other modules on CPAN which are related to finding elements | ||||
819 | in data structures. | ||||
820 | |||||
821 | =over 4 | ||||
822 | |||||
823 | =item Data::Path | ||||
824 | |||||
825 | L<http://metacpan.org/release/Data-Path> | ||||
826 | |||||
827 | =item XML::XPathEngine | ||||
828 | |||||
829 | L<http://metacpan.org/release/XML-XPathEngine> | ||||
830 | |||||
831 | =item Tree::XPathEngine | ||||
832 | |||||
833 | L<http://metacpan.org/release/Tree-XPathEngine> | ||||
834 | |||||
835 | =item Class::XPath | ||||
836 | |||||
837 | L<http://metacpan.org/release/Class-XPath> | ||||
838 | |||||
839 | =item Hash::Path | ||||
840 | |||||
841 | L<http://metacpan.org/release/Hash-Path> | ||||
842 | |||||
843 | =back | ||||
844 | |||||
845 | =head1 AUTHOR | ||||
846 | |||||
847 | Steffen Schwigon <ss5@renormalist.net> | ||||
848 | |||||
849 | =head1 COPYRIGHT AND LICENSE | ||||
850 | |||||
851 | This software is copyright (c) 2012 by Steffen Schwigon. | ||||
852 | |||||
853 | This is free software; you can redistribute it and/or modify it under | ||||
854 | the same terms as the Perl 5 programming language system itself. | ||||
855 | |||||
856 | =cut | ||||
857 | |||||
858 | |||||
859 | __END__ |