File | /usr/share/perl5/XML/Simple.pm |
Statements Executed | 35 |
Total Time | 0.008789 seconds |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 16µs | 16µs | import | XML::Simple::
0 | 0 | 0 | 0s | 0s | BEGIN | XML::Simple::
0 | 0 | 0 | 0s | 0s | XMLin | XML::Simple::
0 | 0 | 0 | 0s | 0s | XMLout | XML::Simple::
0 | 0 | 0 | 0s | 0s | _get_object | XML::Simple::
0 | 0 | 0 | 0s | 0s | array_to_hash | XML::Simple::
0 | 0 | 0 | 0s | 0s | build_simple_tree | XML::Simple::
0 | 0 | 0 | 0s | 0s | build_tree | XML::Simple::
0 | 0 | 0 | 0s | 0s | build_tree_xml_parser | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_read_memcopy | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_read_memshare | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_read_storable | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_write_memcopy | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_write_memshare | XML::Simple::
0 | 0 | 0 | 0s | 0s | cache_write_storable | XML::Simple::
0 | 0 | 0 | 0s | 0s | characters | XML::Simple::
0 | 0 | 0 | 0s | 0s | collapse | XML::Simple::
0 | 0 | 0 | 0s | 0s | collapse_content | XML::Simple::
0 | 0 | 0 | 0s | 0s | copy_hash | XML::Simple::
0 | 0 | 0 | 0s | 0s | default_config_file | XML::Simple::
0 | 0 | 0 | 0s | 0s | die_or_warn | XML::Simple::
0 | 0 | 0 | 0s | 0s | end_document | XML::Simple::
0 | 0 | 0 | 0s | 0s | end_element | XML::Simple::
0 | 0 | 0 | 0s | 0s | escape_value | XML::Simple::
0 | 0 | 0 | 0s | 0s | find_xml_file | XML::Simple::
0 | 0 | 0 | 0s | 0s | get_var | XML::Simple::
0 | 0 | 0 | 0s | 0s | handle_options | XML::Simple::
0 | 0 | 0 | 0s | 0s | hash_to_array | XML::Simple::
0 | 0 | 0 | 0s | 0s | new | XML::Simple::
0 | 0 | 0 | 0s | 0s | new_hashref | XML::Simple::
0 | 0 | 0 | 0s | 0s | normalise_space | XML::Simple::
0 | 0 | 0 | 0s | 0s | numeric_escape | XML::Simple::
0 | 0 | 0 | 0s | 0s | parse_fh | XML::Simple::
0 | 0 | 0 | 0s | 0s | parse_file | XML::Simple::
0 | 0 | 0 | 0s | 0s | parse_string | XML::Simple::
0 | 0 | 0 | 0s | 0s | set_var | XML::Simple::
0 | 0 | 0 | 0s | 0s | sorted_keys | XML::Simple::
0 | 0 | 0 | 0s | 0s | start_document | XML::Simple::
0 | 0 | 0 | 0s | 0s | start_element | XML::Simple::
0 | 0 | 0 | 0s | 0s | storable_filename | XML::Simple::
0 | 0 | 0 | 0s | 0s | value_to_xml | XML::Simple::
Line | Stmts. | Exclusive Time | Avg. | Code |
---|---|---|---|---|
1 | # $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $ | |||
2 | ||||
3 | package XML::Simple; | |||
4 | ||||
5 | =head1 NAME | |||
6 | ||||
7 | XML::Simple - Easy API to maintain XML (esp config files) | |||
8 | ||||
9 | =head1 SYNOPSIS | |||
10 | ||||
11 | use XML::Simple; | |||
12 | ||||
13 | my $ref = XMLin([<xml file or string>] [, <options>]); | |||
14 | ||||
15 | my $xml = XMLout($hashref [, <options>]); | |||
16 | ||||
17 | Or the object oriented way: | |||
18 | ||||
19 | require XML::Simple; | |||
20 | ||||
21 | my $xs = XML::Simple->new(options); | |||
22 | ||||
23 | my $ref = $xs->XMLin([<xml file or string>] [, <options>]); | |||
24 | ||||
25 | my $xml = $xs->XMLout($hashref [, <options>]); | |||
26 | ||||
27 | (or see L<"SAX SUPPORT"> for 'the SAX way'). | |||
28 | ||||
29 | To catch common errors: | |||
30 | ||||
31 | use XML::Simple qw(:strict); | |||
32 | ||||
33 | (see L<"STRICT MODE"> for more details). | |||
34 | ||||
35 | =cut | |||
36 | ||||
37 | # See after __END__ for more POD documentation | |||
38 | ||||
39 | ||||
40 | # Load essentials here, other modules loaded on demand later | |||
41 | ||||
42 | 3 | 30µs | 10µs | use strict; # spent 10µs making 1 call to strict::import |
43 | 3 | 45µs | 15µs | use Carp; # spent 64µs making 1 call to Exporter::import |
44 | 1 | 2µs | 2µs | require Exporter; |
45 | ||||
46 | ||||
47 | ############################################################################## | |||
48 | # Define some constants | |||
49 | # | |||
50 | ||||
51 | 3 | 7.46ms | 2.49ms | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); # spent 77µs making 1 call to vars::import |
52 | ||||
53 | 1 | 13µs | 13µs | @ISA = qw(Exporter); |
54 | 1 | 1µs | 1µs | @EXPORT = qw(XMLin XMLout); |
55 | 1 | 900ns | 900ns | @EXPORT_OK = qw(xml_in xml_out); |
56 | 1 | 500ns | 500ns | $VERSION = '2.18'; |
57 | 1 | 400ns | 400ns | $PREFERRED_PARSER = undef; |
58 | ||||
59 | 1 | 500ns | 500ns | my $StrictMode = 0; |
60 | ||||
61 | 1 | 5µs | 5µs | my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr |
62 | searchpath forcearray cache suppressempty parseropts | |||
63 | grouptags nsexpand datahandler varattr variables | |||
64 | normalisespace normalizespace valueattr); | |||
65 | ||||
66 | 1 | 7µs | 7µs | my @KnownOptOut = qw(keyattr keeproot contentkey noattr |
67 | rootname xmldecl outputfile noescape suppressempty | |||
68 | grouptags nsexpand handler noindent attrindent nosort | |||
69 | valueattr numericescape); | |||
70 | ||||
71 | 1 | 1µs | 1µs | my @DefKeyAttr = qw(name key id); |
72 | 1 | 500ns | 500ns | my $DefRootName = qq(opt); |
73 | 1 | 500ns | 500ns | my $DefContentKey = qq(content); |
74 | 1 | 500ns | 500ns | my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); |
75 | ||||
76 | 1 | 500ns | 500ns | my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; |
77 | 1 | 6µs | 6µs | my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround |
78 | ||||
79 | ||||
80 | ############################################################################## | |||
81 | # Globals for use by caching routines | |||
82 | # | |||
83 | ||||
84 | 1 | 800ns | 800ns | my %MemShareCache = (); |
85 | 1 | 300ns | 300ns | my %MemCopyCache = (); |
86 | ||||
87 | ||||
88 | ############################################################################## | |||
89 | # Wrapper for Exporter - handles ':strict' | |||
90 | # | |||
91 | ||||
92 | # spent 16µs within XML::Simple::import which was called
# once (16µs+0s) by MARC::Moose::Parser::MarcxmlSax::BEGIN at line 11 of /home/tamil/util/marc-moose/lib/MARC/Moose/Parser/MarcxmlSax.pm | |||
93 | # Handle the :strict tag | |||
94 | ||||
95 | 1 | 3µs | 3µs | $StrictMode = 1 if grep(/^:strict$/, @_); |
96 | ||||
97 | # Pass everything else to Exporter.pm | |||
98 | ||||
99 | 1 | 3µs | 3µs | @_ = grep(!/^:strict$/, @_); |
100 | 1 | 4µs | 4µs | goto &Exporter::import; |
101 | } | |||
102 | ||||
103 | ||||
104 | ############################################################################## | |||
105 | # Constructor for optional object interface. | |||
106 | # | |||
107 | ||||
108 | sub new { | |||
109 | my $class = shift; | |||
110 | ||||
111 | if(@_ % 2) { | |||
112 | croak "Default options must be name=>value pairs (odd number supplied)"; | |||
113 | } | |||
114 | ||||
115 | my %known_opt; | |||
116 | @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; | |||
117 | ||||
118 | my %raw_opt = @_; | |||
119 | my %def_opt; | |||
120 | while(my($key, $val) = each %raw_opt) { | |||
121 | my $lkey = lc($key); | |||
122 | $lkey =~ s/_//g; | |||
123 | croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); | |||
124 | $def_opt{$lkey} = $val; | |||
125 | } | |||
126 | my $self = { def_opt => \%def_opt }; | |||
127 | ||||
128 | return(bless($self, $class)); | |||
129 | } | |||
130 | ||||
131 | ||||
132 | ############################################################################## | |||
133 | # Sub: _get_object() | |||
134 | # | |||
135 | # Helper routine called from XMLin() and XMLout() to create an object if none | |||
136 | # was provided. Note, this routine does mess with the caller's @_ array. | |||
137 | # | |||
138 | ||||
139 | sub _get_object { | |||
140 | my $self; | |||
141 | if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { | |||
142 | $self = shift; | |||
143 | } | |||
144 | else { | |||
145 | $self = XML::Simple->new(); | |||
146 | } | |||
147 | ||||
148 | return $self; | |||
149 | } | |||
150 | ||||
151 | ||||
152 | ############################################################################## | |||
153 | # Sub/Method: XMLin() | |||
154 | # | |||
155 | # Exported routine for slurping XML into a hashref - see pod for info. | |||
156 | # | |||
157 | # May be called as object method or as a plain function. | |||
158 | # | |||
159 | # Expects one arg for the source XML, optionally followed by a number of | |||
160 | # name => value option pairs. | |||
161 | # | |||
162 | ||||
163 | sub XMLin { | |||
164 | my $self = &_get_object; # note, @_ is passed implicitly | |||
165 | ||||
166 | my $target = shift; | |||
167 | ||||
168 | ||||
169 | # Work out whether to parse a string, a file or a filehandle | |||
170 | ||||
171 | if(not defined $target) { | |||
172 | return $self->parse_file(undef, @_); | |||
173 | } | |||
174 | ||||
175 | elsif($target eq '-') { | |||
176 | local($/) = undef; | |||
177 | $target = <STDIN>; | |||
178 | return $self->parse_string(\$target, @_); | |||
179 | } | |||
180 | ||||
181 | elsif(my $type = ref($target)) { | |||
182 | if($type eq 'SCALAR') { | |||
183 | return $self->parse_string($target, @_); | |||
184 | } | |||
185 | else { | |||
186 | return $self->parse_fh($target, @_); | |||
187 | } | |||
188 | } | |||
189 | ||||
190 | elsif($target =~ m{<.*?>}s) { | |||
191 | return $self->parse_string(\$target, @_); | |||
192 | } | |||
193 | ||||
194 | else { | |||
195 | return $self->parse_file($target, @_); | |||
196 | } | |||
197 | } | |||
198 | ||||
199 | ||||
200 | ############################################################################## | |||
201 | # Sub/Method: parse_file() | |||
202 | # | |||
203 | # Same as XMLin, but only parses from a named file. | |||
204 | # | |||
205 | ||||
206 | sub parse_file { | |||
207 | my $self = &_get_object; # note, @_ is passed implicitly | |||
208 | ||||
209 | my $filename = shift; | |||
210 | ||||
211 | $self->handle_options('in', @_); | |||
212 | ||||
213 | $filename = $self->default_config_file if not defined $filename; | |||
214 | ||||
215 | $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); | |||
216 | ||||
217 | # Check cache for previous parse | |||
218 | ||||
219 | if($self->{opt}->{cache}) { | |||
220 | foreach my $scheme (@{$self->{opt}->{cache}}) { | |||
221 | my $method = 'cache_read_' . $scheme; | |||
222 | my $opt = $self->$method($filename); | |||
223 | return($opt) if($opt); | |||
224 | } | |||
225 | } | |||
226 | ||||
227 | my $ref = $self->build_simple_tree($filename, undef); | |||
228 | ||||
229 | if($self->{opt}->{cache}) { | |||
230 | my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; | |||
231 | $self->$method($ref, $filename); | |||
232 | } | |||
233 | ||||
234 | return $ref; | |||
235 | } | |||
236 | ||||
237 | ||||
238 | ############################################################################## | |||
239 | # Sub/Method: parse_fh() | |||
240 | # | |||
241 | # Same as XMLin, but only parses from a filehandle. | |||
242 | # | |||
243 | ||||
244 | sub parse_fh { | |||
245 | my $self = &_get_object; # note, @_ is passed implicitly | |||
246 | ||||
247 | my $fh = shift; | |||
248 | croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . | |||
249 | " as a filehandle" unless ref $fh; | |||
250 | ||||
251 | $self->handle_options('in', @_); | |||
252 | ||||
253 | return $self->build_simple_tree(undef, $fh); | |||
254 | } | |||
255 | ||||
256 | ||||
257 | ############################################################################## | |||
258 | # Sub/Method: parse_string() | |||
259 | # | |||
260 | # Same as XMLin, but only parses from a string or a reference to a string. | |||
261 | # | |||
262 | ||||
263 | sub parse_string { | |||
264 | my $self = &_get_object; # note, @_ is passed implicitly | |||
265 | ||||
266 | my $string = shift; | |||
267 | ||||
268 | $self->handle_options('in', @_); | |||
269 | ||||
270 | return $self->build_simple_tree(undef, ref $string ? $string : \$string); | |||
271 | } | |||
272 | ||||
273 | ||||
274 | ############################################################################## | |||
275 | # Method: default_config_file() | |||
276 | # | |||
277 | # Returns the name of the XML file to parse if no filename (or XML string) | |||
278 | # was provided. | |||
279 | # | |||
280 | ||||
281 | sub default_config_file { | |||
282 | my $self = shift; | |||
283 | ||||
284 | require File::Basename; | |||
285 | ||||
286 | my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); | |||
287 | ||||
288 | # Add script directory to searchpath | |||
289 | ||||
290 | if($script_dir) { | |||
291 | unshift(@{$self->{opt}->{searchpath}}, $script_dir); | |||
292 | } | |||
293 | ||||
294 | return $basename . '.xml'; | |||
295 | } | |||
296 | ||||
297 | ||||
298 | ############################################################################## | |||
299 | # Method: build_simple_tree() | |||
300 | # | |||
301 | # Builds a 'tree' data structure as provided by XML::Parser and then | |||
302 | # 'simplifies' it as specified by the various options in effect. | |||
303 | # | |||
304 | ||||
305 | sub build_simple_tree { | |||
306 | my $self = shift; | |||
307 | ||||
308 | my $tree = $self->build_tree(@_); | |||
309 | ||||
310 | return $self->{opt}->{keeproot} | |||
311 | ? $self->collapse({}, @$tree) | |||
312 | : $self->collapse(@{$tree->[1]}); | |||
313 | } | |||
314 | ||||
315 | ||||
316 | ############################################################################## | |||
317 | # Method: build_tree() | |||
318 | # | |||
319 | # This routine will be called if there is no suitable pre-parsed tree in a | |||
320 | # cache. It parses the XML and returns an XML::Parser 'Tree' style data | |||
321 | # structure (summarised in the comments for the collapse() routine below). | |||
322 | # | |||
323 | # XML::Simple requires the services of another module that knows how to parse | |||
324 | # XML. If XML::SAX is installed, the default SAX parser will be used, | |||
325 | # otherwise XML::Parser will be used. | |||
326 | # | |||
327 | # This routine expects to be passed a filename as argument 1 or a 'string' as | |||
328 | # argument 2. The 'string' might be a string of XML (passed by reference to | |||
329 | # save memory) or it might be a reference to an IO::Handle. (This | |||
330 | # non-intuitive mess results in part from the way XML::Parser works but that's | |||
331 | # really no excuse). | |||
332 | # | |||
333 | ||||
334 | sub build_tree { | |||
335 | my $self = shift; | |||
336 | my $filename = shift; | |||
337 | my $string = shift; | |||
338 | ||||
339 | ||||
340 | my $preferred_parser = $PREFERRED_PARSER; | |||
341 | unless(defined($preferred_parser)) { | |||
342 | $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; | |||
343 | } | |||
344 | if($preferred_parser eq 'XML::Parser') { | |||
345 | return($self->build_tree_xml_parser($filename, $string)); | |||
346 | } | |||
347 | ||||
348 | eval { require XML::SAX; }; # We didn't need it until now | |||
349 | if($@) { # No XML::SAX - fall back to XML::Parser | |||
350 | if($preferred_parser) { # unless a SAX parser was expressly requested | |||
351 | croak "XMLin() could not load XML::SAX"; | |||
352 | } | |||
353 | return($self->build_tree_xml_parser($filename, $string)); | |||
354 | } | |||
355 | ||||
356 | $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); | |||
357 | ||||
358 | my $sp = XML::SAX::ParserFactory->parser(Handler => $self); | |||
359 | ||||
360 | $self->{nocollapse} = 1; | |||
361 | my($tree); | |||
362 | if($filename) { | |||
363 | $tree = $sp->parse_uri($filename); | |||
364 | } | |||
365 | else { | |||
366 | if(ref($string) && ref($string) ne 'SCALAR') { | |||
367 | $tree = $sp->parse_file($string); | |||
368 | } | |||
369 | else { | |||
370 | $tree = $sp->parse_string($$string); | |||
371 | } | |||
372 | } | |||
373 | ||||
374 | return($tree); | |||
375 | } | |||
376 | ||||
377 | ||||
378 | ############################################################################## | |||
379 | # Method: build_tree_xml_parser() | |||
380 | # | |||
381 | # This routine will be called if XML::SAX is not installed, or if XML::Parser | |||
382 | # was specifically requested. It takes the same arguments as build_tree() and | |||
383 | # returns the same data structure (XML::Parser 'Tree' style). | |||
384 | # | |||
385 | ||||
386 | sub build_tree_xml_parser { | |||
387 | my $self = shift; | |||
388 | my $filename = shift; | |||
389 | my $string = shift; | |||
390 | ||||
391 | ||||
392 | eval { | |||
393 | local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() | |||
394 | require XML::Parser; # We didn't need it until now | |||
395 | }; | |||
396 | if($@) { | |||
397 | croak "XMLin() requires either XML::SAX or XML::Parser"; | |||
398 | } | |||
399 | ||||
400 | if($self->{opt}->{nsexpand}) { | |||
401 | carp "'nsexpand' option requires XML::SAX"; | |||
402 | } | |||
403 | ||||
404 | my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); | |||
405 | my($tree); | |||
406 | if($filename) { | |||
407 | # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl | |||
408 | local(*XML_FILE); | |||
409 | open(XML_FILE, '<', $filename) || croak qq($filename - $!); | |||
410 | $tree = $xp->parse(*XML_FILE); | |||
411 | close(XML_FILE); | |||
412 | } | |||
413 | else { | |||
414 | $tree = $xp->parse($$string); | |||
415 | } | |||
416 | ||||
417 | return($tree); | |||
418 | } | |||
419 | ||||
420 | ||||
421 | ############################################################################## | |||
422 | # Method: cache_write_storable() | |||
423 | # | |||
424 | # Wrapper routine for invoking Storable::nstore() to cache a parsed data | |||
425 | # structure. | |||
426 | # | |||
427 | ||||
428 | sub cache_write_storable { | |||
429 | my($self, $data, $filename) = @_; | |||
430 | ||||
431 | my $cachefile = $self->storable_filename($filename); | |||
432 | ||||
433 | require Storable; # We didn't need it until now | |||
434 | ||||
435 | if ('VMS' eq $^O) { | |||
436 | Storable::nstore($data, $cachefile); | |||
437 | } | |||
438 | else { | |||
439 | # If the following line fails for you, your Storable.pm is old - upgrade | |||
440 | Storable::lock_nstore($data, $cachefile); | |||
441 | } | |||
442 | ||||
443 | } | |||
444 | ||||
445 | ||||
446 | ############################################################################## | |||
447 | # Method: cache_read_storable() | |||
448 | # | |||
449 | # Wrapper routine for invoking Storable::retrieve() to read a cached parsed | |||
450 | # data structure. Only returns cached data if the cache file exists and is | |||
451 | # newer than the source XML file. | |||
452 | # | |||
453 | ||||
454 | sub cache_read_storable { | |||
455 | my($self, $filename) = @_; | |||
456 | ||||
457 | my $cachefile = $self->storable_filename($filename); | |||
458 | ||||
459 | return unless(-r $cachefile); | |||
460 | return unless((stat($cachefile))[9] > (stat($filename))[9]); | |||
461 | ||||
462 | require Storable; # We didn't need it until now | |||
463 | ||||
464 | if ('VMS' eq $^O) { | |||
465 | return(Storable::retrieve($cachefile)); | |||
466 | } | |||
467 | else { | |||
468 | return(Storable::lock_retrieve($cachefile)); | |||
469 | } | |||
470 | ||||
471 | } | |||
472 | ||||
473 | ||||
474 | ############################################################################## | |||
475 | # Method: storable_filename() | |||
476 | # | |||
477 | # Translates the supplied source XML filename into a filename for the storable | |||
478 | # cached data. A '.stor' suffix is added after stripping an optional '.xml' | |||
479 | # suffix. | |||
480 | # | |||
481 | ||||
482 | sub storable_filename { | |||
483 | my($self, $cachefile) = @_; | |||
484 | ||||
485 | $cachefile =~ s{(\.xml)?$}{.stor}; | |||
486 | return $cachefile; | |||
487 | } | |||
488 | ||||
489 | ||||
490 | ############################################################################## | |||
491 | # Method: cache_write_memshare() | |||
492 | # | |||
493 | # Takes the supplied data structure reference and stores it away in a global | |||
494 | # hash structure. | |||
495 | # | |||
496 | ||||
497 | sub cache_write_memshare { | |||
498 | my($self, $data, $filename) = @_; | |||
499 | ||||
500 | $MemShareCache{$filename} = [time(), $data]; | |||
501 | } | |||
502 | ||||
503 | ||||
504 | ############################################################################## | |||
505 | # Method: cache_read_memshare() | |||
506 | # | |||
507 | # Takes a filename and looks in a global hash for a cached parsed version. | |||
508 | # | |||
509 | ||||
510 | sub cache_read_memshare { | |||
511 | my($self, $filename) = @_; | |||
512 | ||||
513 | return unless($MemShareCache{$filename}); | |||
514 | return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); | |||
515 | ||||
516 | return($MemShareCache{$filename}->[1]); | |||
517 | ||||
518 | } | |||
519 | ||||
520 | ||||
521 | ############################################################################## | |||
522 | # Method: cache_write_memcopy() | |||
523 | # | |||
524 | # Takes the supplied data structure and stores a copy of it in a global hash | |||
525 | # structure. | |||
526 | # | |||
527 | ||||
528 | sub cache_write_memcopy { | |||
529 | my($self, $data, $filename) = @_; | |||
530 | ||||
531 | require Storable; # We didn't need it until now | |||
532 | ||||
533 | $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; | |||
534 | } | |||
535 | ||||
536 | ||||
537 | ############################################################################## | |||
538 | # Method: cache_read_memcopy() | |||
539 | # | |||
540 | # Takes a filename and looks in a global hash for a cached parsed version. | |||
541 | # Returns a reference to a copy of that data structure. | |||
542 | # | |||
543 | ||||
544 | sub cache_read_memcopy { | |||
545 | my($self, $filename) = @_; | |||
546 | ||||
547 | return unless($MemCopyCache{$filename}); | |||
548 | return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); | |||
549 | ||||
550 | return(Storable::dclone($MemCopyCache{$filename}->[1])); | |||
551 | ||||
552 | } | |||
553 | ||||
554 | ||||
555 | ############################################################################## | |||
556 | # Sub/Method: XMLout() | |||
557 | # | |||
558 | # Exported routine for 'unslurping' a data structure out to XML. | |||
559 | # | |||
560 | # Expects a reference to a data structure and an optional list of option | |||
561 | # name => value pairs. | |||
562 | # | |||
563 | ||||
564 | sub XMLout { | |||
565 | my $self = &_get_object; # note, @_ is passed implicitly | |||
566 | ||||
567 | croak "XMLout() requires at least one argument" unless(@_); | |||
568 | my $ref = shift; | |||
569 | ||||
570 | $self->handle_options('out', @_); | |||
571 | ||||
572 | ||||
573 | # If namespace expansion is set, XML::NamespaceSupport is required | |||
574 | ||||
575 | if($self->{opt}->{nsexpand}) { | |||
576 | require XML::NamespaceSupport; | |||
577 | $self->{nsup} = XML::NamespaceSupport->new(); | |||
578 | $self->{ns_prefix} = 'aaa'; | |||
579 | } | |||
580 | ||||
581 | ||||
582 | # Wrap top level arrayref in a hash | |||
583 | ||||
584 | if(UNIVERSAL::isa($ref, 'ARRAY')) { | |||
585 | $ref = { anon => $ref }; | |||
586 | } | |||
587 | ||||
588 | ||||
589 | # Extract rootname from top level hash if keeproot enabled | |||
590 | ||||
591 | if($self->{opt}->{keeproot}) { | |||
592 | my(@keys) = keys(%$ref); | |||
593 | if(@keys == 1) { | |||
594 | $ref = $ref->{$keys[0]}; | |||
595 | $self->{opt}->{rootname} = $keys[0]; | |||
596 | } | |||
597 | } | |||
598 | ||||
599 | # Ensure there are no top level attributes if we're not adding root elements | |||
600 | ||||
601 | elsif($self->{opt}->{rootname} eq '') { | |||
602 | if(UNIVERSAL::isa($ref, 'HASH')) { | |||
603 | my $refsave = $ref; | |||
604 | $ref = {}; | |||
605 | foreach (keys(%$refsave)) { | |||
606 | if(ref($refsave->{$_})) { | |||
607 | $ref->{$_} = $refsave->{$_}; | |||
608 | } | |||
609 | else { | |||
610 | $ref->{$_} = [ $refsave->{$_} ]; | |||
611 | } | |||
612 | } | |||
613 | } | |||
614 | } | |||
615 | ||||
616 | ||||
617 | # Encode the hashref and write to file if necessary | |||
618 | ||||
619 | $self->{_ancestors} = []; | |||
620 | my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); | |||
621 | delete $self->{_ancestors}; | |||
622 | ||||
623 | if($self->{opt}->{xmldecl}) { | |||
624 | $xml = $self->{opt}->{xmldecl} . "\n" . $xml; | |||
625 | } | |||
626 | ||||
627 | if($self->{opt}->{outputfile}) { | |||
628 | if(ref($self->{opt}->{outputfile})) { | |||
629 | my $fh = $self->{opt}->{outputfile}; | |||
630 | if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { | |||
631 | eval { require IO::Handle; }; | |||
632 | croak $@ if $@; | |||
633 | } | |||
634 | return($fh->print($xml)); | |||
635 | } | |||
636 | else { | |||
637 | local(*OUT); | |||
638 | open(OUT, '>', "$self->{opt}->{outputfile}") || | |||
639 | croak "open($self->{opt}->{outputfile}): $!"; | |||
640 | binmode(OUT, ':utf8') if($] >= 5.008); | |||
641 | print OUT $xml || croak "print: $!"; | |||
642 | close(OUT); | |||
643 | } | |||
644 | } | |||
645 | elsif($self->{opt}->{handler}) { | |||
646 | require XML::SAX; | |||
647 | my $sp = XML::SAX::ParserFactory->parser( | |||
648 | Handler => $self->{opt}->{handler} | |||
649 | ); | |||
650 | return($sp->parse_string($xml)); | |||
651 | } | |||
652 | else { | |||
653 | return($xml); | |||
654 | } | |||
655 | } | |||
656 | ||||
657 | ||||
658 | ############################################################################## | |||
659 | # Method: handle_options() | |||
660 | # | |||
661 | # Helper routine for both XMLin() and XMLout(). Both routines handle their | |||
662 | # first argument and assume all other args are options handled by this routine. | |||
663 | # Saves a hash of options in $self->{opt}. | |||
664 | # | |||
665 | # If default options were passed to the constructor, they will be retrieved | |||
666 | # here and merged with options supplied to the method call. | |||
667 | # | |||
668 | # First argument should be the string 'in' or the string 'out'. | |||
669 | # | |||
670 | # Remaining arguments should be name=>value pairs. Sets up default values | |||
671 | # for options not supplied. Unrecognised options are a fatal error. | |||
672 | # | |||
673 | ||||
674 | sub handle_options { | |||
675 | my $self = shift; | |||
676 | my $dirn = shift; | |||
677 | ||||
678 | ||||
679 | # Determine valid options based on context | |||
680 | ||||
681 | my %known_opt; | |||
682 | if($dirn eq 'in') { | |||
683 | @known_opt{@KnownOptIn} = @KnownOptIn; | |||
684 | } | |||
685 | else { | |||
686 | @known_opt{@KnownOptOut} = @KnownOptOut; | |||
687 | } | |||
688 | ||||
689 | ||||
690 | # Store supplied options in hashref and weed out invalid ones | |||
691 | ||||
692 | if(@_ % 2) { | |||
693 | croak "Options must be name=>value pairs (odd number supplied)"; | |||
694 | } | |||
695 | my %raw_opt = @_; | |||
696 | my $opt = {}; | |||
697 | $self->{opt} = $opt; | |||
698 | ||||
699 | while(my($key, $val) = each %raw_opt) { | |||
700 | my $lkey = lc($key); | |||
701 | $lkey =~ s/_//g; | |||
702 | croak "Unrecognised option: $key" unless($known_opt{$lkey}); | |||
703 | $opt->{$lkey} = $val; | |||
704 | } | |||
705 | ||||
706 | ||||
707 | # Merge in options passed to constructor | |||
708 | ||||
709 | foreach (keys(%known_opt)) { | |||
710 | unless(exists($opt->{$_})) { | |||
711 | if(exists($self->{def_opt}->{$_})) { | |||
712 | $opt->{$_} = $self->{def_opt}->{$_}; | |||
713 | } | |||
714 | } | |||
715 | } | |||
716 | ||||
717 | ||||
718 | # Set sensible defaults if not supplied | |||
719 | ||||
720 | if(exists($opt->{rootname})) { | |||
721 | unless(defined($opt->{rootname})) { | |||
722 | $opt->{rootname} = ''; | |||
723 | } | |||
724 | } | |||
725 | else { | |||
726 | $opt->{rootname} = $DefRootName; | |||
727 | } | |||
728 | ||||
729 | if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { | |||
730 | $opt->{xmldecl} = $DefXmlDecl; | |||
731 | } | |||
732 | ||||
733 | if(exists($opt->{contentkey})) { | |||
734 | if($opt->{contentkey} =~ m{^-(.*)$}) { | |||
735 | $opt->{contentkey} = $1; | |||
736 | $opt->{collapseagain} = 1; | |||
737 | } | |||
738 | } | |||
739 | else { | |||
740 | $opt->{contentkey} = $DefContentKey; | |||
741 | } | |||
742 | ||||
743 | unless(exists($opt->{normalisespace})) { | |||
744 | $opt->{normalisespace} = $opt->{normalizespace}; | |||
745 | } | |||
746 | $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); | |||
747 | ||||
748 | # Cleanups for values assumed to be arrays later | |||
749 | ||||
750 | if($opt->{searchpath}) { | |||
751 | unless(ref($opt->{searchpath})) { | |||
752 | $opt->{searchpath} = [ $opt->{searchpath} ]; | |||
753 | } | |||
754 | } | |||
755 | else { | |||
756 | $opt->{searchpath} = [ ]; | |||
757 | } | |||
758 | ||||
759 | if($opt->{cache} and !ref($opt->{cache})) { | |||
760 | $opt->{cache} = [ $opt->{cache} ]; | |||
761 | } | |||
762 | if($opt->{cache}) { | |||
763 | $_ = lc($_) foreach (@{$opt->{cache}}); | |||
764 | foreach my $scheme (@{$opt->{cache}}) { | |||
765 | my $method = 'cache_read_' . $scheme; | |||
766 | croak "Unsupported caching scheme: $scheme" | |||
767 | unless($self->can($method)); | |||
768 | } | |||
769 | } | |||
770 | ||||
771 | if(exists($opt->{parseropts})) { | |||
772 | if($^W) { | |||
773 | carp "Warning: " . | |||
774 | "'ParserOpts' is deprecated, contact the author if you need it"; | |||
775 | } | |||
776 | } | |||
777 | else { | |||
778 | $opt->{parseropts} = [ ]; | |||
779 | } | |||
780 | ||||
781 | ||||
782 | # Special cleanup for {forcearray} which could be regex, arrayref or boolean | |||
783 | # or left to default to 0 | |||
784 | ||||
785 | if(exists($opt->{forcearray})) { | |||
786 | if(ref($opt->{forcearray}) eq 'Regexp') { | |||
787 | $opt->{forcearray} = [ $opt->{forcearray} ]; | |||
788 | } | |||
789 | ||||
790 | if(ref($opt->{forcearray}) eq 'ARRAY') { | |||
791 | my @force_list = @{$opt->{forcearray}}; | |||
792 | if(@force_list) { | |||
793 | $opt->{forcearray} = {}; | |||
794 | foreach my $tag (@force_list) { | |||
795 | if(ref($tag) eq 'Regexp') { | |||
796 | push @{$opt->{forcearray}->{_regex}}, $tag; | |||
797 | } | |||
798 | else { | |||
799 | $opt->{forcearray}->{$tag} = 1; | |||
800 | } | |||
801 | } | |||
802 | } | |||
803 | else { | |||
804 | $opt->{forcearray} = 0; | |||
805 | } | |||
806 | } | |||
807 | else { | |||
808 | $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); | |||
809 | } | |||
810 | } | |||
811 | else { | |||
812 | if($StrictMode and $dirn eq 'in') { | |||
813 | croak "No value specified for 'ForceArray' option in call to XML$dirn()"; | |||
814 | } | |||
815 | $opt->{forcearray} = 0; | |||
816 | } | |||
817 | ||||
818 | ||||
819 | # Special cleanup for {keyattr} which could be arrayref or hashref or left | |||
820 | # to default to arrayref | |||
821 | ||||
822 | if(exists($opt->{keyattr})) { | |||
823 | if(ref($opt->{keyattr})) { | |||
824 | if(ref($opt->{keyattr}) eq 'HASH') { | |||
825 | ||||
826 | # Make a copy so we can mess with it | |||
827 | ||||
828 | $opt->{keyattr} = { %{$opt->{keyattr}} }; | |||
829 | ||||
830 | ||||
831 | # Convert keyattr => { elem => '+attr' } | |||
832 | # to keyattr => { elem => [ 'attr', '+' ] } | |||
833 | ||||
834 | foreach my $el (keys(%{$opt->{keyattr}})) { | |||
835 | if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { | |||
836 | $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; | |||
837 | if($StrictMode and $dirn eq 'in') { | |||
838 | next if($opt->{forcearray} == 1); | |||
839 | next if(ref($opt->{forcearray}) eq 'HASH' | |||
840 | and $opt->{forcearray}->{$el}); | |||
841 | croak "<$el> set in KeyAttr but not in ForceArray"; | |||
842 | } | |||
843 | } | |||
844 | else { | |||
845 | delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) | |||
846 | } | |||
847 | } | |||
848 | } | |||
849 | else { | |||
850 | if(@{$opt->{keyattr}} == 0) { | |||
851 | delete($opt->{keyattr}); | |||
852 | } | |||
853 | } | |||
854 | } | |||
855 | else { | |||
856 | $opt->{keyattr} = [ $opt->{keyattr} ]; | |||
857 | } | |||
858 | } | |||
859 | else { | |||
860 | if($StrictMode) { | |||
861 | croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; | |||
862 | } | |||
863 | $opt->{keyattr} = [ @DefKeyAttr ]; | |||
864 | } | |||
865 | ||||
866 | ||||
867 | # Special cleanup for {valueattr} which could be arrayref or hashref | |||
868 | ||||
869 | if(exists($opt->{valueattr})) { | |||
870 | if(ref($opt->{valueattr}) eq 'ARRAY') { | |||
871 | $opt->{valueattrlist} = {}; | |||
872 | $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); | |||
873 | } | |||
874 | } | |||
875 | ||||
876 | # make sure there's nothing weird in {grouptags} | |||
877 | ||||
878 | if($opt->{grouptags}) { | |||
879 | croak "Illegal value for 'GroupTags' option - expected a hashref" | |||
880 | unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); | |||
881 | ||||
882 | while(my($key, $val) = each %{$opt->{grouptags}}) { | |||
883 | next if $key ne $val; | |||
884 | croak "Bad value in GroupTags: '$key' => '$val'"; | |||
885 | } | |||
886 | } | |||
887 | ||||
888 | ||||
889 | # Check the {variables} option is valid and initialise variables hash | |||
890 | ||||
891 | if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { | |||
892 | croak "Illegal value for 'Variables' option - expected a hashref"; | |||
893 | } | |||
894 | ||||
895 | if($opt->{variables}) { | |||
896 | $self->{_var_values} = { %{$opt->{variables}} }; | |||
897 | } | |||
898 | elsif($opt->{varattr}) { | |||
899 | $self->{_var_values} = {}; | |||
900 | } | |||
901 | ||||
902 | } | |||
903 | ||||
904 | ||||
905 | ############################################################################## | |||
906 | # Method: find_xml_file() | |||
907 | # | |||
908 | # Helper routine for XMLin(). | |||
909 | # Takes a filename, and a list of directories, attempts to locate the file in | |||
910 | # the directories listed. | |||
911 | # Returns a full pathname on success; croaks on failure. | |||
912 | # | |||
913 | ||||
914 | sub find_xml_file { | |||
915 | my $self = shift; | |||
916 | my $file = shift; | |||
917 | my @search_path = @_; | |||
918 | ||||
919 | ||||
920 | require File::Basename; | |||
921 | require File::Spec; | |||
922 | ||||
923 | my($filename, $filedir) = File::Basename::fileparse($file); | |||
924 | ||||
925 | if($filename ne $file) { # Ignore searchpath if dir component | |||
926 | return($file) if(-e $file); | |||
927 | } | |||
928 | else { | |||
929 | my($path); | |||
930 | foreach $path (@search_path) { | |||
931 | my $fullpath = File::Spec->catfile($path, $file); | |||
932 | return($fullpath) if(-e $fullpath); | |||
933 | } | |||
934 | } | |||
935 | ||||
936 | # If user did not supply a search path, default to current directory | |||
937 | ||||
938 | if(!@search_path) { | |||
939 | return($file) if(-e $file); | |||
940 | croak "File does not exist: $file"; | |||
941 | } | |||
942 | ||||
943 | croak "Could not find $file in ", join(':', @search_path); | |||
944 | } | |||
945 | ||||
946 | ||||
947 | ############################################################################## | |||
948 | # Method: collapse() | |||
949 | # | |||
950 | # Helper routine for XMLin(). This routine really comprises the 'smarts' (or | |||
951 | # value add) of this module. | |||
952 | # | |||
953 | # Takes the parse tree that XML::Parser produced from the supplied XML and | |||
954 | # recurses through it 'collapsing' unnecessary levels of indirection (nested | |||
955 | # arrays etc) to produce a data structure that is easier to work with. | |||
956 | # | |||
957 | # Elements in the original parser tree are represented as an element name | |||
958 | # followed by an arrayref. The first element of the array is a hashref | |||
959 | # containing the attributes. The rest of the array contains a list of any | |||
960 | # nested elements as name+arrayref pairs: | |||
961 | # | |||
962 | # <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] | |||
963 | # | |||
964 | # The special element name '0' (zero) flags text content. | |||
965 | # | |||
966 | # This routine cuts down the noise by discarding any text content consisting of | |||
967 | # only whitespace and then moves the nested elements into the attribute hash | |||
968 | # using the name of the nested element as the hash key and the collapsed | |||
969 | # version of the nested element as the value. Multiple nested elements with | |||
970 | # the same name will initially be represented as an arrayref, but this may be | |||
971 | # 'folded' into a hashref depending on the value of the keyattr option. | |||
972 | # | |||
973 | ||||
974 | sub collapse { | |||
975 | my $self = shift; | |||
976 | ||||
977 | ||||
978 | # Start with the hash of attributes | |||
979 | ||||
980 | my $attr = shift; | |||
981 | if($self->{opt}->{noattr}) { # Discard if 'noattr' set | |||
982 | $attr = {}; | |||
983 | } | |||
984 | elsif($self->{opt}->{normalisespace} == 2) { | |||
985 | while(my($key, $value) = each %$attr) { | |||
986 | $attr->{$key} = $self->normalise_space($value) | |||
987 | } | |||
988 | } | |||
989 | ||||
990 | ||||
991 | # Do variable substitutions | |||
992 | ||||
993 | if(my $var = $self->{_var_values}) { | |||
994 | while(my($key, $val) = each(%$attr)) { | |||
995 | $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; | |||
996 | $attr->{$key} = $val; | |||
997 | } | |||
998 | } | |||
999 | ||||
1000 | ||||
1001 | # Roll up 'value' attributes (but only if no nested elements) | |||
1002 | ||||
1003 | if(!@_ and keys %$attr == 1) { | |||
1004 | my($k) = keys %$attr; | |||
1005 | if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { | |||
1006 | return $attr->{$k}; | |||
1007 | } | |||
1008 | } | |||
1009 | ||||
1010 | ||||
1011 | # Add any nested elements | |||
1012 | ||||
1013 | my($key, $val); | |||
1014 | while(@_) { | |||
1015 | $key = shift; | |||
1016 | $val = shift; | |||
1017 | ||||
1018 | if(ref($val)) { | |||
1019 | $val = $self->collapse(@$val); | |||
1020 | next if(!defined($val) and $self->{opt}->{suppressempty}); | |||
1021 | } | |||
1022 | elsif($key eq '0') { | |||
1023 | next if($val =~ m{^\s*$}s); # Skip all whitespace content | |||
1024 | ||||
1025 | $val = $self->normalise_space($val) | |||
1026 | if($self->{opt}->{normalisespace} == 2); | |||
1027 | ||||
1028 | # do variable substitutions | |||
1029 | ||||
1030 | if(my $var = $self->{_var_values}) { | |||
1031 | $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; | |||
1032 | } | |||
1033 | ||||
1034 | ||||
1035 | # look for variable definitions | |||
1036 | ||||
1037 | if(my $var = $self->{opt}->{varattr}) { | |||
1038 | if(exists $attr->{$var}) { | |||
1039 | $self->set_var($attr->{$var}, $val); | |||
1040 | } | |||
1041 | } | |||
1042 | ||||
1043 | ||||
1044 | # Collapse text content in element with no attributes to a string | |||
1045 | ||||
1046 | if(!%$attr and !@_) { | |||
1047 | return($self->{opt}->{forcecontent} ? | |||
1048 | { $self->{opt}->{contentkey} => $val } : $val | |||
1049 | ); | |||
1050 | } | |||
1051 | $key = $self->{opt}->{contentkey}; | |||
1052 | } | |||
1053 | ||||
1054 | ||||
1055 | # Combine duplicate attributes into arrayref if required | |||
1056 | ||||
1057 | if(exists($attr->{$key})) { | |||
1058 | if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { | |||
1059 | push(@{$attr->{$key}}, $val); | |||
1060 | } | |||
1061 | else { | |||
1062 | $attr->{$key} = [ $attr->{$key}, $val ]; | |||
1063 | } | |||
1064 | } | |||
1065 | elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | |||
1066 | $attr->{$key} = [ $val ]; | |||
1067 | } | |||
1068 | else { | |||
1069 | if( $key ne $self->{opt}->{contentkey} | |||
1070 | and ( | |||
1071 | ($self->{opt}->{forcearray} == 1) | |||
1072 | or ( | |||
1073 | (ref($self->{opt}->{forcearray}) eq 'HASH') | |||
1074 | and ( | |||
1075 | $self->{opt}->{forcearray}->{$key} | |||
1076 | or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) | |||
1077 | ) | |||
1078 | ) | |||
1079 | ) | |||
1080 | ) { | |||
1081 | $attr->{$key} = [ $val ]; | |||
1082 | } | |||
1083 | else { | |||
1084 | $attr->{$key} = $val; | |||
1085 | } | |||
1086 | } | |||
1087 | ||||
1088 | } | |||
1089 | ||||
1090 | ||||
1091 | # Turn arrayrefs into hashrefs if key fields present | |||
1092 | ||||
1093 | if($self->{opt}->{keyattr}) { | |||
1094 | while(($key,$val) = each %$attr) { | |||
1095 | if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | |||
1096 | $attr->{$key} = $self->array_to_hash($key, $val); | |||
1097 | } | |||
1098 | } | |||
1099 | } | |||
1100 | ||||
1101 | ||||
1102 | # disintermediate grouped tags | |||
1103 | ||||
1104 | if($self->{opt}->{grouptags}) { | |||
1105 | while(my($key, $val) = each(%$attr)) { | |||
1106 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | |||
1107 | next unless(exists($self->{opt}->{grouptags}->{$key})); | |||
1108 | ||||
1109 | my($child_key, $child_val) = %$val; | |||
1110 | ||||
1111 | if($self->{opt}->{grouptags}->{$key} eq $child_key) { | |||
1112 | $attr->{$key}= $child_val; | |||
1113 | } | |||
1114 | } | |||
1115 | } | |||
1116 | ||||
1117 | ||||
1118 | # Fold hashes containing a single anonymous array up into just the array | |||
1119 | ||||
1120 | my $count = scalar keys %$attr; | |||
1121 | if($count == 1 | |||
1122 | and exists $attr->{anon} | |||
1123 | and UNIVERSAL::isa($attr->{anon}, 'ARRAY') | |||
1124 | ) { | |||
1125 | return($attr->{anon}); | |||
1126 | } | |||
1127 | ||||
1128 | ||||
1129 | # Do the right thing if hash is empty, otherwise just return it | |||
1130 | ||||
1131 | if(!%$attr and exists($self->{opt}->{suppressempty})) { | |||
1132 | if(defined($self->{opt}->{suppressempty}) and | |||
1133 | $self->{opt}->{suppressempty} eq '') { | |||
1134 | return(''); | |||
1135 | } | |||
1136 | return(undef); | |||
1137 | } | |||
1138 | ||||
1139 | ||||
1140 | # Roll up named elements with named nested 'value' attributes | |||
1141 | ||||
1142 | if($self->{opt}->{valueattr}) { | |||
1143 | while(my($key, $val) = each(%$attr)) { | |||
1144 | next unless($self->{opt}->{valueattr}->{$key}); | |||
1145 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | |||
1146 | my($k) = keys %$val; | |||
1147 | next unless($k eq $self->{opt}->{valueattr}->{$key}); | |||
1148 | $attr->{$key} = $val->{$k}; | |||
1149 | } | |||
1150 | } | |||
1151 | ||||
1152 | return($attr) | |||
1153 | ||||
1154 | } | |||
1155 | ||||
1156 | ||||
1157 | ############################################################################## | |||
1158 | # Method: set_var() | |||
1159 | # | |||
1160 | # Called when a variable definition is encountered in the XML. (A variable | |||
1161 | # definition looks like <element attrname="name">value</element> where attrname | |||
1162 | # matches the varattr setting). | |||
1163 | # | |||
1164 | ||||
1165 | sub set_var { | |||
1166 | my($self, $name, $value) = @_; | |||
1167 | ||||
1168 | $self->{_var_values}->{$name} = $value; | |||
1169 | } | |||
1170 | ||||
1171 | ||||
1172 | ############################################################################## | |||
1173 | # Method: get_var() | |||
1174 | # | |||
1175 | # Called during variable substitution to get the value for the named variable. | |||
1176 | # | |||
1177 | ||||
1178 | sub get_var { | |||
1179 | my($self, $name) = @_; | |||
1180 | ||||
1181 | my $value = $self->{_var_values}->{$name}; | |||
1182 | return $value if(defined($value)); | |||
1183 | ||||
1184 | return '${' . $name . '}'; | |||
1185 | } | |||
1186 | ||||
1187 | ||||
1188 | ############################################################################## | |||
1189 | # Method: normalise_space() | |||
1190 | # | |||
1191 | # Strips leading and trailing whitespace and collapses sequences of whitespace | |||
1192 | # characters to a single space. | |||
1193 | # | |||
1194 | ||||
1195 | sub normalise_space { | |||
1196 | my($self, $text) = @_; | |||
1197 | ||||
1198 | $text =~ s/^\s+//s; | |||
1199 | $text =~ s/\s+$//s; | |||
1200 | $text =~ s/\s\s+/ /sg; | |||
1201 | ||||
1202 | return $text; | |||
1203 | } | |||
1204 | ||||
1205 | ||||
1206 | ############################################################################## | |||
1207 | # Method: array_to_hash() | |||
1208 | # | |||
1209 | # Helper routine for collapse(). | |||
1210 | # Attempts to 'fold' an array of hashes into an hash of hashes. Returns a | |||
1211 | # reference to the hash on success or the original array if folding is | |||
1212 | # not possible. Behaviour is controlled by 'keyattr' option. | |||
1213 | # | |||
1214 | ||||
1215 | sub array_to_hash { | |||
1216 | my $self = shift; | |||
1217 | my $name = shift; | |||
1218 | my $arrayref = shift; | |||
1219 | ||||
1220 | my $hashref = $self->new_hashref; | |||
1221 | ||||
1222 | my($i, $key, $val, $flag); | |||
1223 | ||||
1224 | ||||
1225 | # Handle keyattr => { .... } | |||
1226 | ||||
1227 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | |||
1228 | return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); | |||
1229 | ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; | |||
1230 | for($i = 0; $i < @$arrayref; $i++) { | |||
1231 | if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and | |||
1232 | exists($arrayref->[$i]->{$key}) | |||
1233 | ) { | |||
1234 | $val = $arrayref->[$i]->{$key}; | |||
1235 | if(ref($val)) { | |||
1236 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); | |||
1237 | return($arrayref); | |||
1238 | } | |||
1239 | $val = $self->normalise_space($val) | |||
1240 | if($self->{opt}->{normalisespace} == 1); | |||
1241 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | |||
1242 | if(exists($hashref->{$val})); | |||
1243 | $hashref->{$val} = { %{$arrayref->[$i]} }; | |||
1244 | $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); | |||
1245 | delete $hashref->{$val}->{$key} unless($flag eq '+'); | |||
1246 | } | |||
1247 | else { | |||
1248 | $self->die_or_warn("<$name> element has no '$key' key attribute"); | |||
1249 | return($arrayref); | |||
1250 | } | |||
1251 | } | |||
1252 | } | |||
1253 | ||||
1254 | ||||
1255 | # Or assume keyattr => [ .... ] | |||
1256 | ||||
1257 | else { | |||
1258 | my $default_keys = | |||
1259 | join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); | |||
1260 | ||||
1261 | ELEMENT: for($i = 0; $i < @$arrayref; $i++) { | |||
1262 | return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); | |||
1263 | ||||
1264 | foreach $key (@{$self->{opt}->{keyattr}}) { | |||
1265 | if(defined($arrayref->[$i]->{$key})) { | |||
1266 | $val = $arrayref->[$i]->{$key}; | |||
1267 | if(ref($val)) { | |||
1268 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") | |||
1269 | if not $default_keys; | |||
1270 | return($arrayref); | |||
1271 | } | |||
1272 | $val = $self->normalise_space($val) | |||
1273 | if($self->{opt}->{normalisespace} == 1); | |||
1274 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | |||
1275 | if(exists($hashref->{$val})); | |||
1276 | $hashref->{$val} = { %{$arrayref->[$i]} }; | |||
1277 | delete $hashref->{$val}->{$key}; | |||
1278 | next ELEMENT; | |||
1279 | } | |||
1280 | } | |||
1281 | ||||
1282 | return($arrayref); # No keyfield matched | |||
1283 | } | |||
1284 | } | |||
1285 | ||||
1286 | # collapse any hashes which now only have a 'content' key | |||
1287 | ||||
1288 | if($self->{opt}->{collapseagain}) { | |||
1289 | $hashref = $self->collapse_content($hashref); | |||
1290 | } | |||
1291 | ||||
1292 | return($hashref); | |||
1293 | } | |||
1294 | ||||
1295 | ||||
1296 | ############################################################################## | |||
1297 | # Method: die_or_warn() | |||
1298 | # | |||
1299 | # Takes a diagnostic message and does one of three things: | |||
1300 | # 1. dies if strict mode is enabled | |||
1301 | # 2. warns if warnings are enabled but strict mode is not | |||
1302 | # 3. ignores message and resturns silently if neither strict mode nor warnings | |||
1303 | # are enabled | |||
1304 | # | |||
1305 | ||||
1306 | sub die_or_warn { | |||
1307 | my $self = shift; | |||
1308 | my $msg = shift; | |||
1309 | ||||
1310 | croak $msg if($StrictMode); | |||
1311 | carp "Warning: $msg" if($^W); | |||
1312 | } | |||
1313 | ||||
1314 | ||||
1315 | ############################################################################## | |||
1316 | # Method: new_hashref() | |||
1317 | # | |||
1318 | # This is a hook routine for overriding in a sub-class. Some people believe | |||
1319 | # that using Tie::IxHash here will solve order-loss problems. | |||
1320 | # | |||
1321 | ||||
1322 | sub new_hashref { | |||
1323 | my $self = shift; | |||
1324 | ||||
1325 | return { @_ }; | |||
1326 | } | |||
1327 | ||||
1328 | ||||
1329 | ############################################################################## | |||
1330 | # Method: collapse_content() | |||
1331 | # | |||
1332 | # Helper routine for array_to_hash | |||
1333 | # | |||
1334 | # Arguments expected are: | |||
1335 | # - an XML::Simple object | |||
1336 | # - a hasref | |||
1337 | # the hashref is a former array, turned into a hash by array_to_hash because | |||
1338 | # of the presence of key attributes | |||
1339 | # at this point collapse_content avoids over-complicated structures like | |||
1340 | # dir => { libexecdir => { content => '$exec_prefix/libexec' }, | |||
1341 | # localstatedir => { content => '$prefix' }, | |||
1342 | # } | |||
1343 | # into | |||
1344 | # dir => { libexecdir => '$exec_prefix/libexec', | |||
1345 | # localstatedir => '$prefix', | |||
1346 | # } | |||
1347 | ||||
1348 | sub collapse_content { | |||
1349 | my $self = shift; | |||
1350 | my $hashref = shift; | |||
1351 | ||||
1352 | my $contentkey = $self->{opt}->{contentkey}; | |||
1353 | ||||
1354 | # first go through the values,checking that they are fit to collapse | |||
1355 | foreach my $val (values %$hashref) { | |||
1356 | return $hashref unless ( (ref($val) eq 'HASH') | |||
1357 | and (keys %$val == 1) | |||
1358 | and (exists $val->{$contentkey}) | |||
1359 | ); | |||
1360 | } | |||
1361 | ||||
1362 | # now collapse them | |||
1363 | foreach my $key (keys %$hashref) { | |||
1364 | $hashref->{$key}= $hashref->{$key}->{$contentkey}; | |||
1365 | } | |||
1366 | ||||
1367 | return $hashref; | |||
1368 | } | |||
1369 | ||||
1370 | ||||
1371 | ############################################################################## | |||
1372 | # Method: value_to_xml() | |||
1373 | # | |||
1374 | # Helper routine for XMLout() - recurses through a data structure building up | |||
1375 | # and returning an XML representation of that structure as a string. | |||
1376 | # | |||
1377 | # Arguments expected are: | |||
1378 | # - the data structure to be encoded (usually a reference) | |||
1379 | # - the XML tag name to use for this item | |||
1380 | # - a string of spaces for use as the current indent level | |||
1381 | # | |||
1382 | ||||
1383 | sub value_to_xml { | |||
1384 | my $self = shift;; | |||
1385 | ||||
1386 | ||||
1387 | # Grab the other arguments | |||
1388 | ||||
1389 | my($ref, $name, $indent) = @_; | |||
1390 | ||||
1391 | my $named = (defined($name) and $name ne '' ? 1 : 0); | |||
1392 | ||||
1393 | my $nl = "\n"; | |||
1394 | ||||
1395 | my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! | |||
1396 | if($self->{opt}->{noindent}) { | |||
1397 | $indent = ''; | |||
1398 | $nl = ''; | |||
1399 | } | |||
1400 | ||||
1401 | ||||
1402 | # Convert to XML | |||
1403 | ||||
1404 | if(ref($ref)) { | |||
1405 | croak "circular data structures not supported" | |||
1406 | if(grep($_ == $ref, @{$self->{_ancestors}})); | |||
1407 | push @{$self->{_ancestors}}, $ref; | |||
1408 | } | |||
1409 | else { | |||
1410 | if($named) { | |||
1411 | return(join('', | |||
1412 | $indent, '<', $name, '>', | |||
1413 | ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), | |||
1414 | '</', $name, ">", $nl | |||
1415 | )); | |||
1416 | } | |||
1417 | else { | |||
1418 | return("$ref$nl"); | |||
1419 | } | |||
1420 | } | |||
1421 | ||||
1422 | ||||
1423 | # Unfold hash to array if possible | |||
1424 | ||||
1425 | if(UNIVERSAL::isa($ref, 'HASH') # It is a hash | |||
1426 | and keys %$ref # and it's not empty | |||
1427 | and $self->{opt}->{keyattr} # and folding is enabled | |||
1428 | and !$is_root # and its not the root element | |||
1429 | ) { | |||
1430 | $ref = $self->hash_to_array($name, $ref); | |||
1431 | } | |||
1432 | ||||
1433 | ||||
1434 | my @result = (); | |||
1435 | my($key, $value); | |||
1436 | ||||
1437 | ||||
1438 | # Handle hashrefs | |||
1439 | ||||
1440 | if(UNIVERSAL::isa($ref, 'HASH')) { | |||
1441 | ||||
1442 | # Reintermediate grouped values if applicable | |||
1443 | ||||
1444 | if($self->{opt}->{grouptags}) { | |||
1445 | $ref = $self->copy_hash($ref); | |||
1446 | while(my($key, $val) = each %$ref) { | |||
1447 | if($self->{opt}->{grouptags}->{$key}) { | |||
1448 | $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; | |||
1449 | } | |||
1450 | } | |||
1451 | } | |||
1452 | ||||
1453 | ||||
1454 | # Scan for namespace declaration attributes | |||
1455 | ||||
1456 | my $nsdecls = ''; | |||
1457 | my $default_ns_uri; | |||
1458 | if($self->{nsup}) { | |||
1459 | $ref = $self->copy_hash($ref); | |||
1460 | $self->{nsup}->push_context(); | |||
1461 | ||||
1462 | # Look for default namespace declaration first | |||
1463 | ||||
1464 | if(exists($ref->{xmlns})) { | |||
1465 | $self->{nsup}->declare_prefix('', $ref->{xmlns}); | |||
1466 | $nsdecls .= qq( xmlns="$ref->{xmlns}"); | |||
1467 | delete($ref->{xmlns}); | |||
1468 | } | |||
1469 | $default_ns_uri = $self->{nsup}->get_uri(''); | |||
1470 | ||||
1471 | ||||
1472 | # Then check all the other keys | |||
1473 | ||||
1474 | foreach my $qname (keys(%$ref)) { | |||
1475 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | |||
1476 | if($uri) { | |||
1477 | if($uri eq $xmlns_ns) { | |||
1478 | $self->{nsup}->declare_prefix($lname, $ref->{$qname}); | |||
1479 | $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); | |||
1480 | delete($ref->{$qname}); | |||
1481 | } | |||
1482 | } | |||
1483 | } | |||
1484 | ||||
1485 | # Translate any remaining Clarkian names | |||
1486 | ||||
1487 | foreach my $qname (keys(%$ref)) { | |||
1488 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | |||
1489 | if($uri) { | |||
1490 | if($default_ns_uri and $uri eq $default_ns_uri) { | |||
1491 | $ref->{$lname} = $ref->{$qname}; | |||
1492 | delete($ref->{$qname}); | |||
1493 | } | |||
1494 | else { | |||
1495 | my $prefix = $self->{nsup}->get_prefix($uri); | |||
1496 | unless($prefix) { | |||
1497 | # $self->{nsup}->declare_prefix(undef, $uri); | |||
1498 | # $prefix = $self->{nsup}->get_prefix($uri); | |||
1499 | $prefix = $self->{ns_prefix}++; | |||
1500 | $self->{nsup}->declare_prefix($prefix, $uri); | |||
1501 | $nsdecls .= qq( xmlns:$prefix="$uri"); | |||
1502 | } | |||
1503 | $ref->{"$prefix:$lname"} = $ref->{$qname}; | |||
1504 | delete($ref->{$qname}); | |||
1505 | } | |||
1506 | } | |||
1507 | } | |||
1508 | } | |||
1509 | ||||
1510 | ||||
1511 | my @nested = (); | |||
1512 | my $text_content = undef; | |||
1513 | if($named) { | |||
1514 | push @result, $indent, '<', $name, $nsdecls; | |||
1515 | } | |||
1516 | ||||
1517 | if(keys %$ref) { | |||
1518 | my $first_arg = 1; | |||
1519 | foreach my $key ($self->sorted_keys($name, $ref)) { | |||
1520 | my $value = $ref->{$key}; | |||
1521 | next if(substr($key, 0, 1) eq '-'); | |||
1522 | if(!defined($value)) { | |||
1523 | next if $self->{opt}->{suppressempty}; | |||
1524 | unless(exists($self->{opt}->{suppressempty}) | |||
1525 | and !defined($self->{opt}->{suppressempty}) | |||
1526 | ) { | |||
1527 | carp 'Use of uninitialized value' if($^W); | |||
1528 | } | |||
1529 | if($key eq $self->{opt}->{contentkey}) { | |||
1530 | $text_content = ''; | |||
1531 | } | |||
1532 | else { | |||
1533 | $value = exists($self->{opt}->{suppressempty}) ? {} : ''; | |||
1534 | } | |||
1535 | } | |||
1536 | ||||
1537 | if(!ref($value) | |||
1538 | and $self->{opt}->{valueattr} | |||
1539 | and $self->{opt}->{valueattr}->{$key} | |||
1540 | ) { | |||
1541 | $value = { $self->{opt}->{valueattr}->{$key} => $value }; | |||
1542 | } | |||
1543 | ||||
1544 | if(ref($value) or $self->{opt}->{noattr}) { | |||
1545 | push @nested, | |||
1546 | $self->value_to_xml($value, $key, "$indent "); | |||
1547 | } | |||
1548 | else { | |||
1549 | $value = $self->escape_value($value) unless($self->{opt}->{noescape}); | |||
1550 | if($key eq $self->{opt}->{contentkey}) { | |||
1551 | $text_content = $value; | |||
1552 | } | |||
1553 | else { | |||
1554 | push @result, "\n$indent " . ' ' x length($name) | |||
1555 | if($self->{opt}->{attrindent} and !$first_arg); | |||
1556 | push @result, ' ', $key, '="', $value , '"'; | |||
1557 | $first_arg = 0; | |||
1558 | } | |||
1559 | } | |||
1560 | } | |||
1561 | } | |||
1562 | else { | |||
1563 | $text_content = ''; | |||
1564 | } | |||
1565 | ||||
1566 | if(@nested or defined($text_content)) { | |||
1567 | if($named) { | |||
1568 | push @result, ">"; | |||
1569 | if(defined($text_content)) { | |||
1570 | push @result, $text_content; | |||
1571 | $nested[0] =~ s/^\s+// if(@nested); | |||
1572 | } | |||
1573 | else { | |||
1574 | push @result, $nl; | |||
1575 | } | |||
1576 | if(@nested) { | |||
1577 | push @result, @nested, $indent; | |||
1578 | } | |||
1579 | push @result, '</', $name, ">", $nl; | |||
1580 | } | |||
1581 | else { | |||
1582 | push @result, @nested; # Special case if no root elements | |||
1583 | } | |||
1584 | } | |||
1585 | else { | |||
1586 | push @result, " />", $nl; | |||
1587 | } | |||
1588 | $self->{nsup}->pop_context() if($self->{nsup}); | |||
1589 | } | |||
1590 | ||||
1591 | ||||
1592 | # Handle arrayrefs | |||
1593 | ||||
1594 | elsif(UNIVERSAL::isa($ref, 'ARRAY')) { | |||
1595 | foreach $value (@$ref) { | |||
1596 | next if !defined($value) and $self->{opt}->{suppressempty}; | |||
1597 | if(!ref($value)) { | |||
1598 | push @result, | |||
1599 | $indent, '<', $name, '>', | |||
1600 | ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), | |||
1601 | '</', $name, ">$nl"; | |||
1602 | } | |||
1603 | elsif(UNIVERSAL::isa($value, 'HASH')) { | |||
1604 | push @result, $self->value_to_xml($value, $name, $indent); | |||
1605 | } | |||
1606 | else { | |||
1607 | push @result, | |||
1608 | $indent, '<', $name, ">$nl", | |||
1609 | $self->value_to_xml($value, 'anon', "$indent "), | |||
1610 | $indent, '</', $name, ">$nl"; | |||
1611 | } | |||
1612 | } | |||
1613 | } | |||
1614 | ||||
1615 | else { | |||
1616 | croak "Can't encode a value of type: " . ref($ref); | |||
1617 | } | |||
1618 | ||||
1619 | ||||
1620 | pop @{$self->{_ancestors}} if(ref($ref)); | |||
1621 | ||||
1622 | return(join('', @result)); | |||
1623 | } | |||
1624 | ||||
1625 | ||||
1626 | ############################################################################## | |||
1627 | # Method: sorted_keys() | |||
1628 | # | |||
1629 | # Returns the keys of the referenced hash sorted into alphabetical order, but | |||
1630 | # with the 'key' key (as in KeyAttr) first, if there is one. | |||
1631 | # | |||
1632 | ||||
1633 | sub sorted_keys { | |||
1634 | my($self, $name, $ref) = @_; | |||
1635 | ||||
1636 | return keys %$ref if $self->{opt}->{nosort}; | |||
1637 | ||||
1638 | my %hash = %$ref; | |||
1639 | my $keyattr = $self->{opt}->{keyattr}; | |||
1640 | ||||
1641 | my @key; | |||
1642 | ||||
1643 | if(ref $keyattr eq 'HASH') { | |||
1644 | if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { | |||
1645 | push @key, $keyattr->{$name}->[0]; | |||
1646 | delete $hash{$keyattr->{$name}->[0]}; | |||
1647 | } | |||
1648 | } | |||
1649 | elsif(ref $keyattr eq 'ARRAY') { | |||
1650 | foreach (@{$keyattr}) { | |||
1651 | if(exists $hash{$_}) { | |||
1652 | push @key, $_; | |||
1653 | delete $hash{$_}; | |||
1654 | last; | |||
1655 | } | |||
1656 | } | |||
1657 | } | |||
1658 | ||||
1659 | return(@key, sort keys %hash); | |||
1660 | } | |||
1661 | ||||
1662 | ############################################################################## | |||
1663 | # Method: escape_value() | |||
1664 | # | |||
1665 | # Helper routine for automatically escaping values for XMLout(). | |||
1666 | # Expects a scalar data value. Returns escaped version. | |||
1667 | # | |||
1668 | ||||
1669 | sub escape_value { | |||
1670 | my($self, $data) = @_; | |||
1671 | ||||
1672 | return '' unless(defined($data)); | |||
1673 | ||||
1674 | $data =~ s/&/&/sg; | |||
1675 | $data =~ s/</</sg; | |||
1676 | $data =~ s/>/>/sg; | |||
1677 | $data =~ s/"/"/sg; | |||
1678 | ||||
1679 | my $level = $self->{opt}->{numericescape} or return $data; | |||
1680 | ||||
1681 | return $self->numeric_escape($data, $level); | |||
1682 | } | |||
1683 | ||||
1684 | sub numeric_escape { | |||
1685 | my($self, $data, $level) = @_; | |||
1686 | ||||
1687 | 3 | 1.17ms | 390µs | use utf8; # required for 5.6 # spent 12µs making 1 call to utf8::import |
1688 | ||||
1689 | if($self->{opt}->{numericescape} eq '2') { | |||
1690 | $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; | |||
1691 | } | |||
1692 | else { | |||
1693 | $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; | |||
1694 | } | |||
1695 | ||||
1696 | return $data; | |||
1697 | } | |||
1698 | ||||
1699 | ||||
1700 | ############################################################################## | |||
1701 | # Method: hash_to_array() | |||
1702 | # | |||
1703 | # Helper routine for value_to_xml(). | |||
1704 | # Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a | |||
1705 | # reference to the array on success or the original hash if unfolding is | |||
1706 | # not possible. | |||
1707 | # | |||
1708 | ||||
1709 | sub hash_to_array { | |||
1710 | my $self = shift; | |||
1711 | my $parent = shift; | |||
1712 | my $hashref = shift; | |||
1713 | ||||
1714 | my $arrayref = []; | |||
1715 | ||||
1716 | my($key, $value); | |||
1717 | ||||
1718 | my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; | |||
1719 | foreach $key (@keys) { | |||
1720 | $value = $hashref->{$key}; | |||
1721 | return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); | |||
1722 | ||||
1723 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | |||
1724 | return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); | |||
1725 | push @$arrayref, $self->copy_hash( | |||
1726 | $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key | |||
1727 | ); | |||
1728 | } | |||
1729 | else { | |||
1730 | push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); | |||
1731 | } | |||
1732 | } | |||
1733 | ||||
1734 | return($arrayref); | |||
1735 | } | |||
1736 | ||||
1737 | ||||
1738 | ############################################################################## | |||
1739 | # Method: copy_hash() | |||
1740 | # | |||
1741 | # Helper routine for hash_to_array(). When unfolding a hash of hashes into | |||
1742 | # an array of hashes, we need to copy the key from the outer hash into the | |||
1743 | # inner hash. This routine makes a copy of the original hash so we don't | |||
1744 | # destroy the original data structure. You might wish to override this | |||
1745 | # method if you're using tied hashes and don't want them to get untied. | |||
1746 | # | |||
1747 | ||||
1748 | sub copy_hash { | |||
1749 | my($self, $orig, @extra) = @_; | |||
1750 | ||||
1751 | return { @extra, %$orig }; | |||
1752 | } | |||
1753 | ||||
1754 | ############################################################################## | |||
1755 | # Methods required for building trees from SAX events | |||
1756 | ############################################################################## | |||
1757 | ||||
1758 | sub start_document { | |||
1759 | my $self = shift; | |||
1760 | ||||
1761 | $self->handle_options('in') unless($self->{opt}); | |||
1762 | ||||
1763 | $self->{lists} = []; | |||
1764 | $self->{curlist} = $self->{tree} = []; | |||
1765 | } | |||
1766 | ||||
1767 | ||||
1768 | sub start_element { | |||
1769 | my $self = shift; | |||
1770 | my $element = shift; | |||
1771 | ||||
1772 | my $name = $element->{Name}; | |||
1773 | if($self->{opt}->{nsexpand}) { | |||
1774 | $name = $element->{LocalName} || ''; | |||
1775 | if($element->{NamespaceURI}) { | |||
1776 | $name = '{' . $element->{NamespaceURI} . '}' . $name; | |||
1777 | } | |||
1778 | } | |||
1779 | my $attributes = {}; | |||
1780 | if($element->{Attributes}) { # Might be undef | |||
1781 | foreach my $attr (values %{$element->{Attributes}}) { | |||
1782 | if($self->{opt}->{nsexpand}) { | |||
1783 | my $name = $attr->{LocalName} || ''; | |||
1784 | if($attr->{NamespaceURI}) { | |||
1785 | $name = '{' . $attr->{NamespaceURI} . '}' . $name | |||
1786 | } | |||
1787 | $name = 'xmlns' if($name eq $bad_def_ns_jcn); | |||
1788 | $attributes->{$name} = $attr->{Value}; | |||
1789 | } | |||
1790 | else { | |||
1791 | $attributes->{$attr->{Name}} = $attr->{Value}; | |||
1792 | } | |||
1793 | } | |||
1794 | } | |||
1795 | my $newlist = [ $attributes ]; | |||
1796 | push @{ $self->{lists} }, $self->{curlist}; | |||
1797 | push @{ $self->{curlist} }, $name => $newlist; | |||
1798 | $self->{curlist} = $newlist; | |||
1799 | } | |||
1800 | ||||
1801 | ||||
1802 | sub characters { | |||
1803 | my $self = shift; | |||
1804 | my $chars = shift; | |||
1805 | ||||
1806 | my $text = $chars->{Data}; | |||
1807 | my $clist = $self->{curlist}; | |||
1808 | my $pos = $#$clist; | |||
1809 | ||||
1810 | if ($pos > 0 and $clist->[$pos - 1] eq '0') { | |||
1811 | $clist->[$pos] .= $text; | |||
1812 | } | |||
1813 | else { | |||
1814 | push @$clist, 0 => $text; | |||
1815 | } | |||
1816 | } | |||
1817 | ||||
1818 | ||||
1819 | sub end_element { | |||
1820 | my $self = shift; | |||
1821 | ||||
1822 | $self->{curlist} = pop @{ $self->{lists} }; | |||
1823 | } | |||
1824 | ||||
1825 | ||||
1826 | sub end_document { | |||
1827 | my $self = shift; | |||
1828 | ||||
1829 | delete($self->{curlist}); | |||
1830 | delete($self->{lists}); | |||
1831 | ||||
1832 | my $tree = $self->{tree}; | |||
1833 | delete($self->{tree}); | |||
1834 | ||||
1835 | ||||
1836 | # Return tree as-is to XMLin() | |||
1837 | ||||
1838 | return($tree) if($self->{nocollapse}); | |||
1839 | ||||
1840 | ||||
1841 | # Or collapse it before returning it to SAX parser class | |||
1842 | ||||
1843 | if($self->{opt}->{keeproot}) { | |||
1844 | $tree = $self->collapse({}, @$tree); | |||
1845 | } | |||
1846 | else { | |||
1847 | $tree = $self->collapse(@{$tree->[1]}); | |||
1848 | } | |||
1849 | ||||
1850 | if($self->{opt}->{datahandler}) { | |||
1851 | return($self->{opt}->{datahandler}->($self, $tree)); | |||
1852 | } | |||
1853 | ||||
1854 | return($tree); | |||
1855 | } | |||
1856 | ||||
1857 | 1 | 2µs | 2µs | *xml_in = \&XMLin; |
1858 | 1 | 800ns | 800ns | *xml_out = \&XMLout; |
1859 | ||||
1860 | 1 | 34µs | 34µs | 1; |
1861 | ||||
1862 | __END__ | |||
1863 | ||||
1864 | =head1 QUICK START | |||
1865 | ||||
1866 | Say you have a script called B<foo> and a file of configuration options | |||
1867 | called B<foo.xml> containing this: | |||
1868 | ||||
1869 | <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> | |||
1870 | <server name="sahara" osname="solaris" osversion="2.6"> | |||
1871 | <address>10.0.0.101</address> | |||
1872 | <address>10.0.1.101</address> | |||
1873 | </server> | |||
1874 | <server name="gobi" osname="irix" osversion="6.5"> | |||
1875 | <address>10.0.0.102</address> | |||
1876 | </server> | |||
1877 | <server name="kalahari" osname="linux" osversion="2.0.34"> | |||
1878 | <address>10.0.0.103</address> | |||
1879 | <address>10.0.1.103</address> | |||
1880 | </server> | |||
1881 | </config> | |||
1882 | ||||
1883 | The following lines of code in B<foo>: | |||
1884 | ||||
1885 | use XML::Simple; | |||
1886 | ||||
1887 | my $config = XMLin(); | |||
1888 | ||||
1889 | will 'slurp' the configuration options into the hashref $config (because no | |||
1890 | arguments are passed to C<XMLin()> the name and location of the XML file will | |||
1891 | be inferred from name and location of the script). You can dump out the | |||
1892 | contents of the hashref using Data::Dumper: | |||
1893 | ||||
1894 | use Data::Dumper; | |||
1895 | ||||
1896 | print Dumper($config); | |||
1897 | ||||
1898 | which will produce something like this (formatting has been adjusted for | |||
1899 | brevity): | |||
1900 | ||||
1901 | { | |||
1902 | 'logdir' => '/var/log/foo/', | |||
1903 | 'debugfile' => '/tmp/foo.debug', | |||
1904 | 'server' => { | |||
1905 | 'sahara' => { | |||
1906 | 'osversion' => '2.6', | |||
1907 | 'osname' => 'solaris', | |||
1908 | 'address' => [ '10.0.0.101', '10.0.1.101' ] | |||
1909 | }, | |||
1910 | 'gobi' => { | |||
1911 | 'osversion' => '6.5', | |||
1912 | 'osname' => 'irix', | |||
1913 | 'address' => '10.0.0.102' | |||
1914 | }, | |||
1915 | 'kalahari' => { | |||
1916 | 'osversion' => '2.0.34', | |||
1917 | 'osname' => 'linux', | |||
1918 | 'address' => [ '10.0.0.103', '10.0.1.103' ] | |||
1919 | } | |||
1920 | } | |||
1921 | } | |||
1922 | ||||
1923 | Your script could then access the name of the log directory like this: | |||
1924 | ||||
1925 | print $config->{logdir}; | |||
1926 | ||||
1927 | similarly, the second address on the server 'kalahari' could be referenced as: | |||
1928 | ||||
1929 | print $config->{server}->{kalahari}->{address}->[1]; | |||
1930 | ||||
1931 | What could be simpler? (Rhetorical). | |||
1932 | ||||
1933 | For simple requirements, that's really all there is to it. If you want to | |||
1934 | store your XML in a different directory or file, or pass it in as a string or | |||
1935 | even pass it in via some derivative of an IO::Handle, you'll need to check out | |||
1936 | L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that | |||
1937 | neat little transformation that produced $config->{server}) you'll find options | |||
1938 | for that as well. | |||
1939 | ||||
1940 | If you want to generate XML (for example to write a modified version of | |||
1941 | $config back out as XML), check out C<XMLout()>. | |||
1942 | ||||
1943 | If your needs are not so simple, this may not be the module for you. In that | |||
1944 | case, you might want to read L<"WHERE TO FROM HERE?">. | |||
1945 | ||||
1946 | =head1 DESCRIPTION | |||
1947 | ||||
1948 | The XML::Simple module provides a simple API layer on top of an underlying XML | |||
1949 | parsing module (either XML::Parser or one of the SAX2 parser modules). Two | |||
1950 | functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity | |||
1951 | request the lower case versions of the function names: C<xml_in()> and | |||
1952 | C<xml_out()>. | |||
1953 | ||||
1954 | The simplest approach is to call these two functions directly, but an | |||
1955 | optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) | |||
1956 | allows them to be called as methods of an B<XML::Simple> object. The object | |||
1957 | interface can also be used at either end of a SAX pipeline. | |||
1958 | ||||
1959 | =head2 XMLin() | |||
1960 | ||||
1961 | Parses XML formatted data and returns a reference to a data structure which | |||
1962 | contains the same information in a more readily accessible form. (Skip | |||
1963 | down to L<"EXAMPLES"> below, for more sample code). | |||
1964 | ||||
1965 | C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => | |||
1966 | value' option pairs. The XML specifier can be one of the following: | |||
1967 | ||||
1968 | =over 4 | |||
1969 | ||||
1970 | =item A filename | |||
1971 | ||||
1972 | If the filename contains no directory components C<XMLin()> will look for the | |||
1973 | file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the | |||
1974 | current directory if the SearchPath option is not defined. eg: | |||
1975 | ||||
1976 | $ref = XMLin('/etc/params.xml'); | |||
1977 | ||||
1978 | Note, the filename '-' can be used to parse from STDIN. | |||
1979 | ||||
1980 | =item undef | |||
1981 | ||||
1982 | If there is no XML specifier, C<XMLin()> will check the script directory and | |||
1983 | each of the SearchPath directories for a file with the same name as the script | |||
1984 | but with the extension '.xml'. Note: if you wish to specify options, you | |||
1985 | must specify the value 'undef'. eg: | |||
1986 | ||||
1987 | $ref = XMLin(undef, ForceArray => 1); | |||
1988 | ||||
1989 | =item A string of XML | |||
1990 | ||||
1991 | A string containing XML (recognised by the presence of '<' and '>' characters) | |||
1992 | will be parsed directly. eg: | |||
1993 | ||||
1994 | $ref = XMLin('<opt username="bob" password="flurp" />'); | |||
1995 | ||||
1996 | =item An IO::Handle object | |||
1997 | ||||
1998 | An IO::Handle object will be read to EOF and its contents parsed. eg: | |||
1999 | ||||
2000 | $fh = IO::File->new('/etc/params.xml'); | |||
2001 | $ref = XMLin($fh); | |||
2002 | ||||
2003 | =back | |||
2004 | ||||
2005 | =head2 XMLout() | |||
2006 | ||||
2007 | Takes a data structure (generally a hashref) and returns an XML encoding of | |||
2008 | that structure. If the resulting XML is parsed using C<XMLin()>, it should | |||
2009 | return a data structure equivalent to the original (see caveats below). | |||
2010 | ||||
2011 | The C<XMLout()> function can also be used to output the XML as SAX events | |||
2012 | see the C<Handler> option and L<"SAX SUPPORT"> for more details). | |||
2013 | ||||
2014 | When translating hashes to XML, hash keys which have a leading '-' will be | |||
2015 | silently skipped. This is the approved method for marking elements of a | |||
2016 | data structure which should be ignored by C<XMLout>. (Note: If these items | |||
2017 | were not skipped the key names would be emitted as element or attribute names | |||
2018 | with a leading '-' which would not be valid XML). | |||
2019 | ||||
2020 | =head2 Caveats | |||
2021 | ||||
2022 | Some care is required in creating data structures which will be passed to | |||
2023 | C<XMLout()>. Hash keys from the data structure will be encoded as either XML | |||
2024 | element names or attribute names. Therefore, you should use hash key names | |||
2025 | which conform to the relatively strict XML naming rules: | |||
2026 | ||||
2027 | Names in XML must begin with a letter. The remaining characters may be | |||
2028 | letters, digits, hyphens (-), underscores (_) or full stops (.). It is also | |||
2029 | allowable to include one colon (:) in an element name but this should only be | |||
2030 | used when working with namespaces (B<XML::Simple> can only usefully work with | |||
2031 | namespaces when teamed with a SAX Parser). | |||
2032 | ||||
2033 | You can use other punctuation characters in hash values (just not in hash | |||
2034 | keys) however B<XML::Simple> does not support dumping binary data. | |||
2035 | ||||
2036 | If you break these rules, the current implementation of C<XMLout()> will | |||
2037 | simply emit non-compliant XML which will be rejected if you try to read it | |||
2038 | back in. (A later version of B<XML::Simple> might take a more proactive | |||
2039 | approach). | |||
2040 | ||||
2041 | Note also that although you can nest hashes and arrays to arbitrary levels, | |||
2042 | circular data structures are not supported and will cause C<XMLout()> to die. | |||
2043 | ||||
2044 | If you wish to 'round-trip' arbitrary data structures from Perl to XML and back | |||
2045 | to Perl, then you should probably disable array folding (using the KeyAttr | |||
2046 | option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the | |||
2047 | expected results, you may prefer to use L<XML::Dumper> which is designed for | |||
2048 | exactly that purpose. | |||
2049 | ||||
2050 | Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. | |||
2051 | ||||
2052 | ||||
2053 | =head1 OPTIONS | |||
2054 | ||||
2055 | B<XML::Simple> supports a number of options (in fact as each release of | |||
2056 | B<XML::Simple> adds more options, the module's claim to the name 'Simple' | |||
2057 | becomes increasingly tenuous). If you find yourself repeatedly having to | |||
2058 | specify the same options, you might like to investigate L<"OPTIONAL OO | |||
2059 | INTERFACE"> below. | |||
2060 | ||||
2061 | If you can't be bothered reading the documentation, refer to | |||
2062 | L<"STRICT MODE"> to automatically catch common mistakes. | |||
2063 | ||||
2064 | Because there are so many options, it's hard for new users to know which ones | |||
2065 | are important, so here are the two you really need to know about: | |||
2066 | ||||
2067 | =over 4 | |||
2068 | ||||
2069 | =item * | |||
2070 | ||||
2071 | check out C<ForceArray> because you'll almost certainly want to turn it on | |||
2072 | ||||
2073 | =item * | |||
2074 | ||||
2075 | make sure you know what the C<KeyAttr> option does and what its default value is | |||
2076 | because it may surprise you otherwise (note in particular that 'KeyAttr' | |||
2077 | affects both C<XMLin> and C<XMLout>) | |||
2078 | ||||
2079 | =back | |||
2080 | ||||
2081 | The option name headings below have a trailing 'comment' - a hash followed by | |||
2082 | two pieces of metadata: | |||
2083 | ||||
2084 | =over 4 | |||
2085 | ||||
2086 | =item * | |||
2087 | ||||
2088 | Options are marked with 'I<in>' if they are recognised by C<XMLin()> and | |||
2089 | 'I<out>' if they are recognised by C<XMLout()>. | |||
2090 | ||||
2091 | =item * | |||
2092 | ||||
2093 | Each option is also flagged to indicate whether it is: | |||
2094 | ||||
2095 | 'important' - don't use the module until you understand this one | |||
2096 | 'handy' - you can skip this on the first time through | |||
2097 | 'advanced' - you can skip this on the second time through | |||
2098 | 'SAX only' - don't worry about this unless you're using SAX (or | |||
2099 | alternatively if you need this, you also need SAX) | |||
2100 | 'seldom used' - you'll probably never use this unless you were the | |||
2101 | person that requested the feature | |||
2102 | ||||
2103 | =back | |||
2104 | ||||
2105 | The options are listed alphabetically: | |||
2106 | ||||
2107 | Note: option names are no longer case sensitive so you can use the mixed case | |||
2108 | versions shown here; all lower case as required by versions 2.03 and earlier; | |||
2109 | or you can add underscores between the words (eg: key_attr). | |||
2110 | ||||
2111 | ||||
2112 | =head2 AttrIndent => 1 I<# out - handy> | |||
2113 | ||||
2114 | When you are using C<XMLout()>, enable this option to have attributes printed | |||
2115 | one-per-line with sensible indentation rather than all on one line. | |||
2116 | ||||
2117 | =head2 Cache => [ cache schemes ] I<# in - advanced> | |||
2118 | ||||
2119 | Because loading the B<XML::Parser> module and parsing an XML file can consume a | |||
2120 | significant number of CPU cycles, it is often desirable to cache the output of | |||
2121 | C<XMLin()> for later reuse. | |||
2122 | ||||
2123 | When parsing from a named file, B<XML::Simple> supports a number of caching | |||
2124 | schemes. The 'Cache' option may be used to specify one or more schemes (using | |||
2125 | an anonymous array). Each scheme will be tried in turn in the hope of finding | |||
2126 | a cached pre-parsed representation of the XML file. If no cached copy is | |||
2127 | found, the file will be parsed and the first cache scheme in the list will be | |||
2128 | used to save a copy of the results. The following cache schemes have been | |||
2129 | implemented: | |||
2130 | ||||
2131 | =over 4 | |||
2132 | ||||
2133 | =item storable | |||
2134 | ||||
2135 | Utilises B<Storable.pm> to read/write a cache file with the same name as the | |||
2136 | XML file but with the extension .stor | |||
2137 | ||||
2138 | =item memshare | |||
2139 | ||||
2140 | When a file is first parsed, a copy of the resulting data structure is retained | |||
2141 | in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse | |||
2142 | the same file will return a reference to this structure. This cached version | |||
2143 | will persist only for the life of the Perl interpreter (which in the case of | |||
2144 | mod_perl for example, may be some significant time). | |||
2145 | ||||
2146 | Because each caller receives a reference to the same data structure, a change | |||
2147 | made by one caller will be visible to all. For this reason, the reference | |||
2148 | returned should be treated as read-only. | |||
2149 | ||||
2150 | =item memcopy | |||
2151 | ||||
2152 | This scheme works identically to 'memshare' (above) except that each caller | |||
2153 | receives a reference to a new data structure which is a copy of the cached | |||
2154 | version. Copying the data structure will add a little processing overhead, | |||
2155 | therefore this scheme should only be used where the caller intends to modify | |||
2156 | the data structure (or wishes to protect itself from others who might). This | |||
2157 | scheme uses B<Storable.pm> to perform the copy. | |||
2158 | ||||
2159 | =back | |||
2160 | ||||
2161 | Warning! The memory-based caching schemes compare the timestamp on the file to | |||
2162 | the time when it was last parsed. If the file is stored on an NFS filesystem | |||
2163 | (or other network share) and the clock on the file server is not exactly | |||
2164 | synchronised with the clock where your script is run, updates to the source XML | |||
2165 | file may appear to be ignored. | |||
2166 | ||||
2167 | =head2 ContentKey => 'keyname' I<# in+out - seldom used> | |||
2168 | ||||
2169 | When text content is parsed to a hash value, this option let's you specify a | |||
2170 | name for the hash key to override the default 'content'. So for example: | |||
2171 | ||||
2172 | XMLin('<opt one="1">Text</opt>', ContentKey => 'text') | |||
2173 | ||||
2174 | will parse to: | |||
2175 | ||||
2176 | { 'one' => 1, 'text' => 'Text' } | |||
2177 | ||||
2178 | instead of: | |||
2179 | ||||
2180 | { 'one' => 1, 'content' => 'Text' } | |||
2181 | ||||
2182 | C<XMLout()> will also honour the value of this option when converting a hashref | |||
2183 | to XML. | |||
2184 | ||||
2185 | You can also prefix your selected key name with a '-' character to have | |||
2186 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | |||
2187 | array folding. For example: | |||
2188 | ||||
2189 | XMLin( | |||
2190 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | |||
2191 | KeyAttr => {item => 'name'}, | |||
2192 | ForceArray => [ 'item' ], | |||
2193 | ContentKey => '-content' | |||
2194 | ) | |||
2195 | ||||
2196 | will parse to: | |||
2197 | ||||
2198 | { | |||
2199 | 'item' => { | |||
2200 | 'one' => 'First' | |||
2201 | 'two' => 'Second' | |||
2202 | } | |||
2203 | } | |||
2204 | ||||
2205 | rather than this (without the '-'): | |||
2206 | ||||
2207 | { | |||
2208 | 'item' => { | |||
2209 | 'one' => { 'content' => 'First' } | |||
2210 | 'two' => { 'content' => 'Second' } | |||
2211 | } | |||
2212 | } | |||
2213 | ||||
2214 | =head2 DataHandler => code_ref I<# in - SAX only> | |||
2215 | ||||
2216 | When you use an B<XML::Simple> object as a SAX handler, it will return a | |||
2217 | 'simple tree' data structure in the same format as C<XMLin()> would return. If | |||
2218 | this option is set (to a subroutine reference), then when the tree is built the | |||
2219 | subroutine will be called and passed two arguments: a reference to the | |||
2220 | B<XML::Simple> object and a reference to the data tree. The return value from | |||
2221 | the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for | |||
2222 | more details). | |||
2223 | ||||
2224 | =head2 ForceArray => 1 I<# in - important> | |||
2225 | ||||
2226 | This option should be set to '1' to force nested elements to be represented | |||
2227 | as arrays even when there is only one. Eg, with ForceArray enabled, this | |||
2228 | XML: | |||
2229 | ||||
2230 | <opt> | |||
2231 | <name>value</name> | |||
2232 | </opt> | |||
2233 | ||||
2234 | would parse to this: | |||
2235 | ||||
2236 | { | |||
2237 | 'name' => [ | |||
2238 | 'value' | |||
2239 | ] | |||
2240 | } | |||
2241 | ||||
2242 | instead of this (the default): | |||
2243 | ||||
2244 | { | |||
2245 | 'name' => 'value' | |||
2246 | } | |||
2247 | ||||
2248 | This option is especially useful if the data structure is likely to be written | |||
2249 | back out as XML and the default behaviour of rolling single nested elements up | |||
2250 | into attributes is not desirable. | |||
2251 | ||||
2252 | If you are using the array folding feature, you should almost certainly enable | |||
2253 | this option. If you do not, single nested elements will not be parsed to | |||
2254 | arrays and therefore will not be candidates for folding to a hash. (Given that | |||
2255 | the default value of 'KeyAttr' enables array folding, the default value of this | |||
2256 | option should probably also have been enabled too - sorry). | |||
2257 | ||||
2258 | =head2 ForceArray => [ names ] I<# in - important> | |||
2259 | ||||
2260 | This alternative (and preferred) form of the 'ForceArray' option allows you to | |||
2261 | specify a list of element names which should always be forced into an array | |||
2262 | representation, rather than the 'all or nothing' approach above. | |||
2263 | ||||
2264 | It is also possible (since version 2.05) to include compiled regular | |||
2265 | expressions in the list - any element names which match the pattern will be | |||
2266 | forced to arrays. If the list contains only a single regex, then it is not | |||
2267 | necessary to enclose it in an arrayref. Eg: | |||
2268 | ||||
2269 | ForceArray => qr/_list$/ | |||
2270 | ||||
2271 | =head2 ForceContent => 1 I<# in - seldom used> | |||
2272 | ||||
2273 | When C<XMLin()> parses elements which have text content as well as attributes, | |||
2274 | the text content must be represented as a hash value rather than a simple | |||
2275 | scalar. This option allows you to force text content to always parse to | |||
2276 | a hash value even when there are no attributes. So for example: | |||
2277 | ||||
2278 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | |||
2279 | ||||
2280 | will parse to: | |||
2281 | ||||
2282 | { | |||
2283 | 'x' => { 'content' => 'text1' }, | |||
2284 | 'y' => { 'a' => 2, 'content' => 'text2' } | |||
2285 | } | |||
2286 | ||||
2287 | instead of: | |||
2288 | ||||
2289 | { | |||
2290 | 'x' => 'text1', | |||
2291 | 'y' => { 'a' => 2, 'content' => 'text2' } | |||
2292 | } | |||
2293 | ||||
2294 | =head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> | |||
2295 | ||||
2296 | You can use this option to eliminate extra levels of indirection in your Perl | |||
2297 | data structure. For example this XML: | |||
2298 | ||||
2299 | <opt> | |||
2300 | <searchpath> | |||
2301 | <dir>/usr/bin</dir> | |||
2302 | <dir>/usr/local/bin</dir> | |||
2303 | <dir>/usr/X11/bin</dir> | |||
2304 | </searchpath> | |||
2305 | </opt> | |||
2306 | ||||
2307 | Would normally be read into a structure like this: | |||
2308 | ||||
2309 | { | |||
2310 | searchpath => { | |||
2311 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |||
2312 | } | |||
2313 | } | |||
2314 | ||||
2315 | But when read in with the appropriate value for 'GroupTags': | |||
2316 | ||||
2317 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | |||
2318 | ||||
2319 | It will return this simpler structure: | |||
2320 | ||||
2321 | { | |||
2322 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | |||
2323 | } | |||
2324 | ||||
2325 | The grouping element (C<< <searchpath> >> in the example) must not contain any | |||
2326 | attributes or elements other than the grouped element. | |||
2327 | ||||
2328 | You can specify multiple 'grouping element' to 'grouped element' mappings in | |||
2329 | the same hashref. If this option is combined with C<KeyAttr>, the array | |||
2330 | folding will occur first and then the grouped element names will be eliminated. | |||
2331 | ||||
2332 | C<XMLout> will also use the grouptag mappings to re-introduce the tags around | |||
2333 | the grouped elements. Beware though that this will occur in all places that | |||
2334 | the 'grouping tag' name occurs - you probably don't want to use the same name | |||
2335 | for elements as well as attributes. | |||
2336 | ||||
2337 | =head2 Handler => object_ref I<# out - SAX only> | |||
2338 | ||||
2339 | Use the 'Handler' option to have C<XMLout()> generate SAX events rather than | |||
2340 | returning a string of XML. For more details see L<"SAX SUPPORT"> below. | |||
2341 | ||||
2342 | Note: the current implementation of this option generates a string of XML | |||
2343 | and uses a SAX parser to translate it into SAX events. The normal encoding | |||
2344 | rules apply here - your data must be UTF8 encoded unless you specify an | |||
2345 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches | |||
2346 | the handler object, it will be in UTF8 form regardless of the encoding you | |||
2347 | supply. A future implementation of this option may generate the events | |||
2348 | directly. | |||
2349 | ||||
2350 | =head2 KeepRoot => 1 I<# in+out - handy> | |||
2351 | ||||
2352 | In its attempt to return a data structure free of superfluous detail and | |||
2353 | unnecessary levels of indirection, C<XMLin()> normally discards the root | |||
2354 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | |||
2355 | name to be retained. So after executing this code: | |||
2356 | ||||
2357 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | |||
2358 | ||||
2359 | You'll be able to reference the tempdir as | |||
2360 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | |||
2361 | C<$config-E<gt>{tempdir}>. | |||
2362 | ||||
2363 | Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the | |||
2364 | data structure already contains a root element name and it is not necessary to | |||
2365 | add another. | |||
2366 | ||||
2367 | =head2 KeyAttr => [ list ] I<# in+out - important> | |||
2368 | ||||
2369 | This option controls the 'array folding' feature which translates nested | |||
2370 | elements from an array to a hash. It also controls the 'unfolding' of hashes | |||
2371 | to arrays. | |||
2372 | ||||
2373 | For example, this XML: | |||
2374 | ||||
2375 | <opt> | |||
2376 | <user login="grep" fullname="Gary R Epstein" /> | |||
2377 | <user login="stty" fullname="Simon T Tyson" /> | |||
2378 | </opt> | |||
2379 | ||||
2380 | would, by default, parse to this: | |||
2381 | ||||
2382 | { | |||
2383 | 'user' => [ | |||
2384 | { | |||
2385 | 'login' => 'grep', | |||
2386 | 'fullname' => 'Gary R Epstein' | |||
2387 | }, | |||
2388 | { | |||
2389 | 'login' => 'stty', | |||
2390 | 'fullname' => 'Simon T Tyson' | |||
2391 | } | |||
2392 | ] | |||
2393 | } | |||
2394 | ||||
2395 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | |||
2396 | attribute is a key, the same XML would parse to: | |||
2397 | ||||
2398 | { | |||
2399 | 'user' => { | |||
2400 | 'stty' => { | |||
2401 | 'fullname' => 'Simon T Tyson' | |||
2402 | }, | |||
2403 | 'grep' => { | |||
2404 | 'fullname' => 'Gary R Epstein' | |||
2405 | } | |||
2406 | } | |||
2407 | } | |||
2408 | ||||
2409 | The key attribute names should be supplied in an arrayref if there is more | |||
2410 | than one. C<XMLin()> will attempt to match attribute names in the order | |||
2411 | supplied. C<XMLout()> will use the first attribute name supplied when | |||
2412 | 'unfolding' a hash into an array. | |||
2413 | ||||
2414 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do | |||
2415 | not want folding on input or unfolding on output you must setting this option | |||
2416 | to an empty list to disable the feature. | |||
2417 | ||||
2418 | Note 2: If you wish to use this option, you should also enable the | |||
2419 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | |||
2420 | rolled up into a scalar rather than an array and therefore will not be folded | |||
2421 | (since only arrays get folded). | |||
2422 | ||||
2423 | =head2 KeyAttr => { list } I<# in+out - important> | |||
2424 | ||||
2425 | This alternative (and preferred) method of specifiying the key attributes | |||
2426 | allows more fine grained control over which elements are folded and on which | |||
2427 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | |||
2428 | any package elements to be folded on the 'id' attribute. No other elements | |||
2429 | which have an 'id' attribute will be folded at all. | |||
2430 | ||||
2431 | Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) | |||
2432 | if this syntax is used and an element which does not have the specified key | |||
2433 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to | |||
2434 | use the example above). Warnings will only be generated if B<-w> is in force. | |||
2435 | ||||
2436 | Two further variations are made possible by prefixing a '+' or a '-' character | |||
2437 | to the attribute name: | |||
2438 | ||||
2439 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | |||
2440 | ||||
2441 | <opt> | |||
2442 | <user login="grep" fullname="Gary R Epstein" /> | |||
2443 | <user login="stty" fullname="Simon T Tyson" /> | |||
2444 | </opt> | |||
2445 | ||||
2446 | to parse to this data structure: | |||
2447 | ||||
2448 | { | |||
2449 | 'user' => { | |||
2450 | 'stty' => { | |||
2451 | 'fullname' => 'Simon T Tyson', | |||
2452 | 'login' => 'stty' | |||
2453 | }, | |||
2454 | 'grep' => { | |||
2455 | 'fullname' => 'Gary R Epstein', | |||
2456 | 'login' => 'grep' | |||
2457 | } | |||
2458 | } | |||
2459 | } | |||
2460 | ||||
2461 | The '+' indicates that the value of the key attribute should be copied rather | |||
2462 | than moved to the folded hash key. | |||
2463 | ||||
2464 | A '-' prefix would produce this result: | |||
2465 | ||||
2466 | { | |||
2467 | 'user' => { | |||
2468 | 'stty' => { | |||
2469 | 'fullname' => 'Simon T Tyson', | |||
2470 | '-login' => 'stty' | |||
2471 | }, | |||
2472 | 'grep' => { | |||
2473 | 'fullname' => 'Gary R Epstein', | |||
2474 | '-login' => 'grep' | |||
2475 | } | |||
2476 | } | |||
2477 | } | |||
2478 | ||||
2479 | As described earlier, C<XMLout> will ignore hash keys starting with a '-'. | |||
2480 | ||||
2481 | =head2 NoAttr => 1 I<# in+out - handy> | |||
2482 | ||||
2483 | When used with C<XMLout()>, the generated XML will contain no attributes. | |||
2484 | All hash key/values will be represented as nested elements instead. | |||
2485 | ||||
2486 | When used with C<XMLin()>, any attributes in the XML will be ignored. | |||
2487 | ||||
2488 | =head2 NoEscape => 1 I<# out - seldom used> | |||
2489 | ||||
2490 | By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and | |||
2491 | '"' to '<', '>', '&' and '"' respectively. Use this option to | |||
2492 | suppress escaping (presumably because you've already escaped the data in some | |||
2493 | more sophisticated manner). | |||
2494 | ||||
2495 | =head2 NoIndent => 1 I<# out - seldom used> | |||
2496 | ||||
2497 | Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. | |||
2498 | With this option enabled, the XML output will all be on one line (unless there | |||
2499 | are newlines in the data) - this may be easier for downstream processing. | |||
2500 | ||||
2501 | =head2 NoSort => 1 I<# out - seldom used> | |||
2502 | ||||
2503 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), | |||
2504 | by default. Enable this option to suppress the sorting - possibly for | |||
2505 | backwards compatibility. | |||
2506 | ||||
2507 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in | |||
2508 | 'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements | |||
2509 | are sorted alphabetically by the value of the key field. | |||
2510 | ||||
2511 | =head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> | |||
2512 | ||||
2513 | This option controls how whitespace in text content is handled. Recognised | |||
2514 | values for the option are: | |||
2515 | ||||
2516 | =over 4 | |||
2517 | ||||
2518 | =item * | |||
2519 | ||||
2520 | 0 = (default) whitespace is passed through unaltered (except of course for the | |||
2521 | normalisation of whitespace in attribute values which is mandated by the XML | |||
2522 | recommendation) | |||
2523 | ||||
2524 | =item * | |||
2525 | ||||
2526 | 1 = whitespace is normalised in any value used as a hash key (normalising means | |||
2527 | removing leading and trailing whitespace and collapsing sequences of whitespace | |||
2528 | characters to a single space) | |||
2529 | ||||
2530 | =item * | |||
2531 | ||||
2532 | 2 = whitespace is normalised in all text content | |||
2533 | ||||
2534 | =back | |||
2535 | ||||
2536 | Note: you can spell this option with a 'z' if that is more natural for you. | |||
2537 | ||||
2538 | =head2 NSExpand => 1 I<# in+out handy - SAX only> | |||
2539 | ||||
2540 | This option controls namespace expansion - the translation of element and | |||
2541 | attribute names of the form 'prefix:name' to '{uri}name'. For example the | |||
2542 | element name 'xsl:template' might be expanded to: | |||
2543 | '{http://www.w3.org/1999/XSL/Transform}template'. | |||
2544 | ||||
2545 | By default, C<XMLin()> will return element names and attribute names exactly as | |||
2546 | they appear in the XML. Setting this option to 1 will cause all element and | |||
2547 | attribute names to be expanded to include their namespace prefix. | |||
2548 | ||||
2549 | I<Note: You must be using a SAX parser for this option to work (ie: it does not | |||
2550 | work with XML::Parser)>. | |||
2551 | ||||
2552 | This option also controls whether C<XMLout()> performs the reverse translation | |||
2553 | from '{uri}name' back to 'prefix:name'. The default is no translation. If | |||
2554 | your data contains expanded names, you should set this option to 1 otherwise | |||
2555 | C<XMLout> will emit XML which is not well formed. | |||
2556 | ||||
2557 | I<Note: You must have the XML::NamespaceSupport module installed if you want | |||
2558 | C<XMLout()> to translate URIs back to prefixes>. | |||
2559 | ||||
2560 | =head2 NumericEscape => 0 | 1 | 2 I<# out - handy> | |||
2561 | ||||
2562 | Use this option to have 'high' (non-ASCII) characters in your Perl data | |||
2563 | structure converted to numeric entities (eg: €) in the XML output. Three | |||
2564 | levels are possible: | |||
2565 | ||||
2566 | 0 - default: no numeric escaping (OK if you're writing out UTF8) | |||
2567 | ||||
2568 | 1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output | |||
2569 | ||||
2570 | 2 - all characters above 0x7F are escaped (good for plain ASCII output) | |||
2571 | ||||
2572 | =head2 OutputFile => <file specifier> I<# out - handy> | |||
2573 | ||||
2574 | The default behaviour of C<XMLout()> is to return the XML as a string. If you | |||
2575 | wish to write the XML to a file, simply supply the filename using the | |||
2576 | 'OutputFile' option. | |||
2577 | ||||
2578 | This option also accepts an IO handle object - especially useful in Perl 5.8.0 | |||
2579 | and later for output using an encoding other than UTF-8, eg: | |||
2580 | ||||
2581 | open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; | |||
2582 | XMLout($ref, OutputFile => $fh); | |||
2583 | ||||
2584 | Note, XML::Simple does not require that the object you pass in to the | |||
2585 | OutputFile option inherits from L<IO::Handle> - it simply assumes the object | |||
2586 | supports a C<print> method. | |||
2587 | ||||
2588 | =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> | |||
2589 | ||||
2590 | I<Note: This option is now officially deprecated. If you find it useful, email | |||
2591 | the author with an example of what you use it for. Do not use this option to | |||
2592 | set the ProtocolEncoding, that's just plain wrong - fix the XML>. | |||
2593 | ||||
2594 | This option allows you to pass parameters to the constructor of the underlying | |||
2595 | XML::Parser object (which of course assumes you're not using SAX). | |||
2596 | ||||
2597 | =head2 RootName => 'string' I<# out - handy> | |||
2598 | ||||
2599 | By default, when C<XMLout()> generates XML, the root element will be named | |||
2600 | 'opt'. This option allows you to specify an alternative name. | |||
2601 | ||||
2602 | Specifying either undef or the empty string for the RootName option will | |||
2603 | produce XML with no root elements. In most cases the resulting XML fragment | |||
2604 | will not be 'well formed' and therefore could not be read back in by C<XMLin()>. | |||
2605 | Nevertheless, the option has been found to be useful in certain circumstances. | |||
2606 | ||||
2607 | =head2 SearchPath => [ list ] I<# in - handy> | |||
2608 | ||||
2609 | If you pass C<XMLin()> a filename, but the filename include no directory | |||
2610 | component, you can use this option to specify which directories should be | |||
2611 | searched to locate the file. You might use this option to search first in the | |||
2612 | user's home directory, then in a global directory such as /etc. | |||
2613 | ||||
2614 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | |||
2615 | file is assumed to be in the current directory. | |||
2616 | ||||
2617 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | |||
2618 | will contain only the directory in which the script itself is located. | |||
2619 | Otherwise the default SearchPath will be empty. | |||
2620 | ||||
2621 | =head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> | |||
2622 | ||||
2623 | This option controls what C<XMLin()> should do with empty elements (no | |||
2624 | attributes and no content). The default behaviour is to represent them as | |||
2625 | empty hashes. Setting this option to a true value (eg: 1) will cause empty | |||
2626 | elements to be skipped altogether. Setting the option to 'undef' or the empty | |||
2627 | string will cause empty elements to be represented as the undefined value or | |||
2628 | the empty string respectively. The latter two alternatives are a little | |||
2629 | easier to test for in your code than a hash with no keys. | |||
2630 | ||||
2631 | The option also controls what C<XMLout()> does with undefined values. Setting | |||
2632 | the option to undef causes undefined values to be output as empty elements | |||
2633 | (rather than empty attributes), it also suppresses the generation of warnings | |||
2634 | about undefined values. Setting the option to a true value (eg: 1) causes | |||
2635 | undefined values to be skipped altogether on output. | |||
2636 | ||||
2637 | =head2 ValueAttr => [ names ] I<# in - handy> | |||
2638 | ||||
2639 | Use this option to deal elements which always have a single attribute and no | |||
2640 | content. Eg: | |||
2641 | ||||
2642 | <opt> | |||
2643 | <colour value="red" /> | |||
2644 | <size value="XXL" /> | |||
2645 | </opt> | |||
2646 | ||||
2647 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | |||
2648 | ||||
2649 | { | |||
2650 | colour => 'red', | |||
2651 | size => 'XXL' | |||
2652 | } | |||
2653 | ||||
2654 | instead of this (the default): | |||
2655 | ||||
2656 | { | |||
2657 | colour => { value => 'red' }, | |||
2658 | size => { value => 'XXL' } | |||
2659 | } | |||
2660 | ||||
2661 | Note: This form of the ValueAttr option is not compatible with C<XMLout()> - | |||
2662 | since the attribute name is discarded at parse time, the original XML cannot be | |||
2663 | reconstructed. | |||
2664 | ||||
2665 | =head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> | |||
2666 | ||||
2667 | This (preferred) form of the ValueAttr option requires you to specify both | |||
2668 | the element and the attribute names. This is not only safer, it also allows | |||
2669 | the original XML to be reconstructed by C<XMLout()>. | |||
2670 | ||||
2671 | Note: You probably don't want to use this option and the NoAttr option at the | |||
2672 | same time. | |||
2673 | ||||
2674 | =head2 Variables => { name => value } I<# in - handy> | |||
2675 | ||||
2676 | This option allows variables in the XML to be expanded when the file is read. | |||
2677 | (there is no facility for putting the variable names back if you regenerate | |||
2678 | XML using C<XMLout>). | |||
2679 | ||||
2680 | A 'variable' is any text of the form C<${name}> which occurs in an attribute | |||
2681 | value or in the text content of an element. If 'name' matches a key in the | |||
2682 | supplied hashref, C<${name}> will be replaced with the corresponding value from | |||
2683 | the hashref. If no matching key is found, the variable will not be replaced. | |||
2684 | Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are | |||
2685 | allowed). | |||
2686 | ||||
2687 | =head2 VarAttr => 'attr_name' I<# in - handy> | |||
2688 | ||||
2689 | In addition to the variables defined using C<Variables>, this option allows | |||
2690 | variables to be defined in the XML. A variable definition consists of an | |||
2691 | element with an attribute called 'attr_name' (the value of the C<VarAttr> | |||
2692 | option). The value of the attribute will be used as the variable name and the | |||
2693 | text content of the element will be used as the value. A variable defined in | |||
2694 | this way will override a variable defined using the C<Variables> option. For | |||
2695 | example: | |||
2696 | ||||
2697 | XMLin( '<opt> | |||
2698 | <dir name="prefix">/usr/local/apache</dir> | |||
2699 | <dir name="exec_prefix">${prefix}</dir> | |||
2700 | <dir name="bindir">${exec_prefix}/bin</dir> | |||
2701 | </opt>', | |||
2702 | VarAttr => 'name', ContentKey => '-content' | |||
2703 | ); | |||
2704 | ||||
2705 | produces the following data structure: | |||
2706 | ||||
2707 | { | |||
2708 | dir => { | |||
2709 | prefix => '/usr/local/apache', | |||
2710 | exec_prefix => '/usr/local/apache', | |||
2711 | bindir => '/usr/local/apache/bin', | |||
2712 | } | |||
2713 | } | |||
2714 | ||||
2715 | =head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> | |||
2716 | ||||
2717 | If you want the output from C<XMLout()> to start with the optional XML | |||
2718 | declaration, simply set the option to '1'. The default XML declaration is: | |||
2719 | ||||
2720 | <?xml version='1.0' standalone='yes'?> | |||
2721 | ||||
2722 | If you want some other string (for example to declare an encoding value), set | |||
2723 | the value of this option to the complete string you require. | |||
2724 | ||||
2725 | ||||
2726 | =head1 OPTIONAL OO INTERFACE | |||
2727 | ||||
2728 | The procedural interface is both simple and convenient however there are a | |||
2729 | couple of reasons why you might prefer to use the object oriented (OO) | |||
2730 | interface: | |||
2731 | ||||
2732 | =over 4 | |||
2733 | ||||
2734 | =item * | |||
2735 | ||||
2736 | to define a set of default values which should be used on all subsequent calls | |||
2737 | to C<XMLin()> or C<XMLout()> | |||
2738 | ||||
2739 | =item * | |||
2740 | ||||
2741 | to override methods in B<XML::Simple> to provide customised behaviour | |||
2742 | ||||
2743 | =back | |||
2744 | ||||
2745 | The default values for the options described above are unlikely to suit | |||
2746 | everyone. The OO interface allows you to effectively override B<XML::Simple>'s | |||
2747 | defaults with your preferred values. It works like this: | |||
2748 | ||||
2749 | First create an XML::Simple parser object with your preferred defaults: | |||
2750 | ||||
2751 | my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); | |||
2752 | ||||
2753 | then call C<XMLin()> or C<XMLout()> as a method of that object: | |||
2754 | ||||
2755 | my $ref = $xs->XMLin($xml); | |||
2756 | my $xml = $xs->XMLout($ref); | |||
2757 | ||||
2758 | You can also specify options when you make the method calls and these values | |||
2759 | will be merged with the values specified when the object was created. Values | |||
2760 | specified in a method call take precedence. | |||
2761 | ||||
2762 | Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be | |||
2763 | called as C<xml_in()> or C<xml_out()>. The method names are aliased so the | |||
2764 | only difference is the aesthetics. | |||
2765 | ||||
2766 | =head2 Parsing Methods | |||
2767 | ||||
2768 | You can explicitly call one of the following methods rather than rely on the | |||
2769 | C<xml_in()> method automatically determining whether the target to be parsed is | |||
2770 | a string, a file or a filehandle: | |||
2771 | ||||
2772 | =over 4 | |||
2773 | ||||
2774 | =item parse_string(text) | |||
2775 | ||||
2776 | Works exactly like the C<xml_in()> method but assumes the first argument is | |||
2777 | a string of XML (or a reference to a scalar containing a string of XML). | |||
2778 | ||||
2779 | =item parse_file(filename) | |||
2780 | ||||
2781 | Works exactly like the C<xml_in()> method but assumes the first argument is | |||
2782 | the name of a file containing XML. | |||
2783 | ||||
2784 | =item parse_fh(file_handle) | |||
2785 | ||||
2786 | Works exactly like the C<xml_in()> method but assumes the first argument is | |||
2787 | a filehandle which can be read to get XML. | |||
2788 | ||||
2789 | =back | |||
2790 | ||||
2791 | =head2 Hook Methods | |||
2792 | ||||
2793 | You can make your own class which inherits from XML::Simple and overrides | |||
2794 | certain behaviours. The following methods may provide useful 'hooks' upon | |||
2795 | which to hang your modified behaviour. You may find other undocumented methods | |||
2796 | by examining the source, but those may be subject to change in future releases. | |||
2797 | ||||
2798 | =over 4 | |||
2799 | ||||
2800 | =item handle_options(direction, name => value ...) | |||
2801 | ||||
2802 | This method will be called when one of the parsing methods or the C<XMLout()> | |||
2803 | method is called. The initial argument will be a string (either 'in' or 'out') | |||
2804 | and the remaining arguments will be name value pairs. | |||
2805 | ||||
2806 | =item default_config_file() | |||
2807 | ||||
2808 | Calculates and returns the name of the file which should be parsed if no | |||
2809 | filename is passed to C<XMLin()> (default: C<$0.xml>). | |||
2810 | ||||
2811 | =item build_simple_tree(filename, string) | |||
2812 | ||||
2813 | Called from C<XMLin()> or any of the parsing methods. Takes either a file name | |||
2814 | as the first argument or C<undef> followed by a 'string' as the second | |||
2815 | argument. Returns a simple tree data structure. You could override this | |||
2816 | method to apply your own transformations before the data structure is returned | |||
2817 | to the caller. | |||
2818 | ||||
2819 | =item new_hashref() | |||
2820 | ||||
2821 | When the 'simple tree' data structure is being built, this method will be | |||
2822 | called to create any required anonymous hashrefs. | |||
2823 | ||||
2824 | =item sorted_keys(name, hashref) | |||
2825 | ||||
2826 | Called when C<XMLout()> is translating a hashref to XML. This routine returns | |||
2827 | a list of hash keys in the order that the corresponding attributes/elements | |||
2828 | should appear in the output. | |||
2829 | ||||
2830 | =item escape_value(string) | |||
2831 | ||||
2832 | Called from C<XMLout()>, takes a string and returns a copy of the string with | |||
2833 | XML character escaping rules applied. | |||
2834 | ||||
2835 | =item numeric_escape(string) | |||
2836 | ||||
2837 | Called from C<escape_value()>, to handle non-ASCII characters (depending on the | |||
2838 | value of the NumericEscape option). | |||
2839 | ||||
2840 | =item copy_hash(hashref, extra_key => value, ...) | |||
2841 | ||||
2842 | Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of | |||
2843 | hashes. You might wish to override this method if you're using tied hashes and | |||
2844 | don't want them to get untied. | |||
2845 | ||||
2846 | =back | |||
2847 | ||||
2848 | =head2 Cache Methods | |||
2849 | ||||
2850 | XML::Simple implements three caching schemes ('storable', 'memshare' and | |||
2851 | 'memcopy'). You can implement a custom caching scheme by implementing | |||
2852 | two methods - one for reading from the cache and one for writing to it. | |||
2853 | ||||
2854 | For example, you might implement a new 'dbm' scheme that stores cached data | |||
2855 | structures using the L<MLDBM> module. First, you would add a | |||
2856 | C<cache_read_dbm()> method which accepted a filename for use as a lookup key | |||
2857 | and returned a data structure on success, or undef on failure. Then, you would | |||
2858 | implement a C<cache_read_dbm()> method which accepted a data structure and a | |||
2859 | filename. | |||
2860 | ||||
2861 | You would use this caching scheme by specifying the option: | |||
2862 | ||||
2863 | Cache => [ 'dbm' ] | |||
2864 | ||||
2865 | =head1 STRICT MODE | |||
2866 | ||||
2867 | If you import the B<XML::Simple> routines like this: | |||
2868 | ||||
2869 | use XML::Simple qw(:strict); | |||
2870 | ||||
2871 | the following common mistakes will be detected and treated as fatal errors | |||
2872 | ||||
2873 | =over 4 | |||
2874 | ||||
2875 | =item * | |||
2876 | ||||
2877 | Failing to explicitly set the C<KeyAttr> option - if you can't be bothered | |||
2878 | reading about this option, turn it off with: KeyAttr => [ ] | |||
2879 | ||||
2880 | =item * | |||
2881 | ||||
2882 | Failing to explicitly set the C<ForceArray> option - if you can't be bothered | |||
2883 | reading about this option, set it to the safest mode with: ForceArray => 1 | |||
2884 | ||||
2885 | =item * | |||
2886 | ||||
2887 | Setting ForceArray to an array, but failing to list all the elements from the | |||
2888 | KeyAttr hash. | |||
2889 | ||||
2890 | =item * | |||
2891 | ||||
2892 | Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains | |||
2893 | one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested | |||
2894 | element). Note: if strict mode is not set but -w is, this condition triggers a | |||
2895 | warning. | |||
2896 | ||||
2897 | =item * | |||
2898 | ||||
2899 | Data error - as above, but non-unique values are present in the key attribute | |||
2900 | (eg: more than one E<lt>partE<gt> element with the same partnum). This will | |||
2901 | also trigger a warning if strict mode is not enabled. | |||
2902 | ||||
2903 | =item * | |||
2904 | ||||
2905 | Data error - as above, but value of key attribute (eg: partnum) is not a | |||
2906 | scalar string (due to nested elements etc). This will also trigger a warning | |||
2907 | if strict mode is not enabled. | |||
2908 | ||||
2909 | =back | |||
2910 | ||||
2911 | =head1 SAX SUPPORT | |||
2912 | ||||
2913 | From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API | |||
2914 | for XML) - specifically SAX2. | |||
2915 | ||||
2916 | In a typical SAX application, an XML parser (or SAX 'driver') module generates | |||
2917 | SAX events (start of element, character data, end of element, etc) as it parses | |||
2918 | an XML document and a 'handler' module processes the events to extract the | |||
2919 | required data. This simple model allows for some interesting and powerful | |||
2920 | possibilities: | |||
2921 | ||||
2922 | =over 4 | |||
2923 | ||||
2924 | =item * | |||
2925 | ||||
2926 | Applications written to the SAX API can extract data from huge XML documents | |||
2927 | without the memory overheads of a DOM or tree API. | |||
2928 | ||||
2929 | =item * | |||
2930 | ||||
2931 | The SAX API allows for plug and play interchange of parser modules without | |||
2932 | having to change your code to fit a new module's API. A number of SAX parsers | |||
2933 | are available with capabilities ranging from extreme portability to blazing | |||
2934 | performance. | |||
2935 | ||||
2936 | =item * | |||
2937 | ||||
2938 | A SAX 'filter' module can implement both a handler interface for receiving | |||
2939 | data and a generator interface for passing modified data on to a downstream | |||
2940 | handler. Filters can be chained together in 'pipelines'. | |||
2941 | ||||
2942 | =item * | |||
2943 | ||||
2944 | One filter module might split a data stream to direct data to two or more | |||
2945 | downstream handlers. | |||
2946 | ||||
2947 | =item * | |||
2948 | ||||
2949 | Generating SAX events is not the exclusive preserve of XML parsing modules. | |||
2950 | For example, a module might extract data from a relational database using DBI | |||
2951 | and pass it on to a SAX pipeline for filtering and formatting. | |||
2952 | ||||
2953 | =back | |||
2954 | ||||
2955 | B<XML::Simple> can operate at either end of a SAX pipeline. For example, | |||
2956 | you can take a data structure in the form of a hashref and pass it into a | |||
2957 | SAX pipeline using the 'Handler' option on C<XMLout()>: | |||
2958 | ||||
2959 | use XML::Simple; | |||
2960 | use Some::SAX::Filter; | |||
2961 | use XML::SAX::Writer; | |||
2962 | ||||
2963 | my $ref = { | |||
2964 | .... # your data here | |||
2965 | }; | |||
2966 | ||||
2967 | my $writer = XML::SAX::Writer->new(); | |||
2968 | my $filter = Some::SAX::Filter->new(Handler => $writer); | |||
2969 | my $simple = XML::Simple->new(Handler => $filter); | |||
2970 | $simple->XMLout($ref); | |||
2971 | ||||
2972 | You can also put B<XML::Simple> at the opposite end of the pipeline to take | |||
2973 | advantage of the simple 'tree' data structure once the relevant data has been | |||
2974 | isolated through filtering: | |||
2975 | ||||
2976 | use XML::SAX; | |||
2977 | use Some::SAX::Filter; | |||
2978 | use XML::Simple; | |||
2979 | ||||
2980 | my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); | |||
2981 | my $filter = Some::SAX::Filter->new(Handler => $simple); | |||
2982 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |||
2983 | ||||
2984 | my $ref = $parser->parse_uri('some_huge_file.xml'); | |||
2985 | ||||
2986 | print $ref->{part}->{'555-1234'}; | |||
2987 | ||||
2988 | You can build a filter by using an XML::Simple object as a handler and setting | |||
2989 | its DataHandler option to point to a routine which takes the resulting tree, | |||
2990 | modifies it and sends it off as SAX events to a downstream handler: | |||
2991 | ||||
2992 | my $writer = XML::SAX::Writer->new(); | |||
2993 | my $filter = XML::Simple->new( | |||
2994 | DataHandler => sub { | |||
2995 | my $simple = shift; | |||
2996 | my $data = shift; | |||
2997 | ||||
2998 | # Modify $data here | |||
2999 | ||||
3000 | $simple->XMLout($data, Handler => $writer); | |||
3001 | } | |||
3002 | ); | |||
3003 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | |||
3004 | ||||
3005 | $parser->parse_uri($filename); | |||
3006 | ||||
3007 | I<Note: In this last example, the 'Handler' option was specified in the call to | |||
3008 | C<XMLout()> but it could also have been specified in the constructor>. | |||
3009 | ||||
3010 | =head1 ENVIRONMENT | |||
3011 | ||||
3012 | If you don't care which parser module B<XML::Simple> uses then skip this | |||
3013 | section entirely (it looks more complicated than it really is). | |||
3014 | ||||
3015 | B<XML::Simple> will default to using a B<SAX> parser if one is available or | |||
3016 | B<XML::Parser> if SAX is not available. | |||
3017 | ||||
3018 | You can dictate which parser module is used by setting either the environment | |||
3019 | variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable | |||
3020 | $XML::Simple::PREFERRED_PARSER to contain the module name. The following rules | |||
3021 | are used: | |||
3022 | ||||
3023 | =over 4 | |||
3024 | ||||
3025 | =item * | |||
3026 | ||||
3027 | The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use | |||
3028 | its default rules, you can set the package variable to an empty string. | |||
3029 | ||||
3030 | =item * | |||
3031 | ||||
3032 | If the 'preferred parser' is set to the string 'XML::Parser', then | |||
3033 | L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not | |||
3034 | installed). | |||
3035 | ||||
3036 | =item * | |||
3037 | ||||
3038 | If the 'preferred parser' is set to some other value, then it is assumed to be | |||
3039 | the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.> | |||
3040 | If L<XML::SAX> is not installed, or the requested parser module is not | |||
3041 | installed, then C<XMLin()> will die. | |||
3042 | ||||
3043 | =item * | |||
3044 | ||||
3045 | If the 'preferred parser' is not defined at all (the normal default | |||
3046 | state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is | |||
3047 | installed, then a parser module will be selected according to | |||
3048 | L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX | |||
3049 | parser installed). | |||
3050 | ||||
3051 | =item * | |||
3052 | ||||
3053 | if the 'preferred parser' is not defined and B<XML::SAX> is not | |||
3054 | installed, then B<XML::Parser> will be used. C<XMLin()> will die if | |||
3055 | L<XML::Parser> is not installed. | |||
3056 | ||||
3057 | =back | |||
3058 | ||||
3059 | Note: The B<XML::SAX> distribution includes an XML parser written entirely in | |||
3060 | Perl. It is very portable but it is not very fast. You should consider | |||
3061 | installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your | |||
3062 | platform. | |||
3063 | ||||
3064 | =head1 ERROR HANDLING | |||
3065 | ||||
3066 | The XML standard is very clear on the issue of non-compliant documents. An | |||
3067 | error in parsing any single element (for example a missing end tag) must cause | |||
3068 | the whole document to be rejected. B<XML::Simple> will die with an appropriate | |||
3069 | message if it encounters a parsing error. | |||
3070 | ||||
3071 | If dying is not appropriate for your application, you should arrange to call | |||
3072 | C<XMLin()> in an eval block and look for errors in $@. eg: | |||
3073 | ||||
3074 | my $config = eval { XMLin() }; | |||
3075 | PopUpMessage($@) if($@); | |||
3076 | ||||
3077 | Note, there is a common misconception that use of B<eval> will significantly | |||
3078 | slow down a script. While that may be true when the code being eval'd is in a | |||
3079 | string, it is not true of code like the sample above. | |||
3080 | ||||
3081 | =head1 EXAMPLES | |||
3082 | ||||
3083 | When C<XMLin()> reads the following very simple piece of XML: | |||
3084 | ||||
3085 | <opt username="testuser" password="frodo"></opt> | |||
3086 | ||||
3087 | it returns the following data structure: | |||
3088 | ||||
3089 | { | |||
3090 | 'username' => 'testuser', | |||
3091 | 'password' => 'frodo' | |||
3092 | } | |||
3093 | ||||
3094 | The identical result could have been produced with this alternative XML: | |||
3095 | ||||
3096 | <opt username="testuser" password="frodo" /> | |||
3097 | ||||
3098 | Or this (although see 'ForceArray' option for variations): | |||
3099 | ||||
3100 | <opt> | |||
3101 | <username>testuser</username> | |||
3102 | <password>frodo</password> | |||
3103 | </opt> | |||
3104 | ||||
3105 | Repeated nested elements are represented as anonymous arrays: | |||
3106 | ||||
3107 | <opt> | |||
3108 | <person firstname="Joe" lastname="Smith"> | |||
3109 | <email>joe@smith.com</email> | |||
3110 | <email>jsmith@yahoo.com</email> | |||
3111 | </person> | |||
3112 | <person firstname="Bob" lastname="Smith"> | |||
3113 | <email>bob@smith.com</email> | |||
3114 | </person> | |||
3115 | </opt> | |||
3116 | ||||
3117 | { | |||
3118 | 'person' => [ | |||
3119 | { | |||
3120 | 'email' => [ | |||
3121 | 'joe@smith.com', | |||
3122 | 'jsmith@yahoo.com' | |||
3123 | ], | |||
3124 | 'firstname' => 'Joe', | |||
3125 | 'lastname' => 'Smith' | |||
3126 | }, | |||
3127 | { | |||
3128 | 'email' => 'bob@smith.com', | |||
3129 | 'firstname' => 'Bob', | |||
3130 | 'lastname' => 'Smith' | |||
3131 | } | |||
3132 | ] | |||
3133 | } | |||
3134 | ||||
3135 | Nested elements with a recognised key attribute are transformed (folded) from | |||
3136 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | |||
3137 | option): | |||
3138 | ||||
3139 | <opt> | |||
3140 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | |||
3141 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | |||
3142 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | |||
3143 | </opt> | |||
3144 | ||||
3145 | { | |||
3146 | 'person' => { | |||
3147 | 'jbloggs' => { | |||
3148 | 'firstname' => 'Joe', | |||
3149 | 'lastname' => 'Bloggs' | |||
3150 | }, | |||
3151 | 'tsmith' => { | |||
3152 | 'firstname' => 'Tom', | |||
3153 | 'lastname' => 'Smith' | |||
3154 | }, | |||
3155 | 'jsmith' => { | |||
3156 | 'firstname' => 'Joe', | |||
3157 | 'lastname' => 'Smith' | |||
3158 | } | |||
3159 | } | |||
3160 | } | |||
3161 | ||||
3162 | ||||
3163 | The <anon> tag can be used to form anonymous arrays: | |||
3164 | ||||
3165 | <opt> | |||
3166 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | |||
3167 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | |||
3168 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | |||
3169 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | |||
3170 | </opt> | |||
3171 | ||||
3172 | { | |||
3173 | 'head' => [ | |||
3174 | [ 'Col 1', 'Col 2', 'Col 3' ] | |||
3175 | ], | |||
3176 | 'data' => [ | |||
3177 | [ 'R1C1', 'R1C2', 'R1C3' ], | |||
3178 | [ 'R2C1', 'R2C2', 'R2C3' ], | |||
3179 | [ 'R3C1', 'R3C2', 'R3C3' ] | |||
3180 | ] | |||
3181 | } | |||
3182 | ||||
3183 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | |||
3184 | the surrounding tags for an XML document contain only an anonymous array the | |||
3185 | arrayref will be returned directly rather than the usual hashref: | |||
3186 | ||||
3187 | <opt> | |||
3188 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | |||
3189 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | |||
3190 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | |||
3191 | </opt> | |||
3192 | ||||
3193 | [ | |||
3194 | [ 'Col 1', 'Col 2' ], | |||
3195 | [ 'R1C1', 'R1C2' ], | |||
3196 | [ 'R2C1', 'R2C2' ] | |||
3197 | ] | |||
3198 | ||||
3199 | Elements which only contain text content will simply be represented as a | |||
3200 | scalar. Where an element has both attributes and text content, the element | |||
3201 | will be represented as a hashref with the text content in the 'content' key | |||
3202 | (see the C<ContentKey> option): | |||
3203 | ||||
3204 | <opt> | |||
3205 | <one>first</one> | |||
3206 | <two attr="value">second</two> | |||
3207 | </opt> | |||
3208 | ||||
3209 | { | |||
3210 | 'one' => 'first', | |||
3211 | 'two' => { 'attr' => 'value', 'content' => 'second' } | |||
3212 | } | |||
3213 | ||||
3214 | Mixed content (elements which contain both text content and nested elements) | |||
3215 | will be not be represented in a useful way - element order and significant | |||
3216 | whitespace will be lost. If you need to work with mixed content, then | |||
3217 | XML::Simple is not the right tool for your job - check out the next section. | |||
3218 | ||||
3219 | =head1 WHERE TO FROM HERE? | |||
3220 | ||||
3221 | B<XML::Simple> is able to present a simple API because it makes some | |||
3222 | assumptions on your behalf. These include: | |||
3223 | ||||
3224 | =over 4 | |||
3225 | ||||
3226 | =item * | |||
3227 | ||||
3228 | You're not interested in text content consisting only of whitespace | |||
3229 | ||||
3230 | =item * | |||
3231 | ||||
3232 | You don't mind that when things get slurped into a hash the order is lost | |||
3233 | ||||
3234 | =item * | |||
3235 | ||||
3236 | You don't want fine-grained control of the formatting of generated XML | |||
3237 | ||||
3238 | =item * | |||
3239 | ||||
3240 | You would never use a hash key that was not a legal XML element name | |||
3241 | ||||
3242 | =item * | |||
3243 | ||||
3244 | You don't need help converting between different encodings | |||
3245 | ||||
3246 | =back | |||
3247 | ||||
3248 | In a serious XML project, you'll probably outgrow these assumptions fairly | |||
3249 | quickly. This section of the document used to offer some advice on chosing a | |||
3250 | more powerful option. That advice has now grown into the 'Perl-XML FAQ' | |||
3251 | document which you can find at: L<http://perl-xml.sourceforge.net/faq/> | |||
3252 | ||||
3253 | The advice in the FAQ boils down to a quick explanation of tree versus | |||
3254 | event based parsers and then recommends: | |||
3255 | ||||
3256 | For event based parsing, use SAX (do not set out to write any new code for | |||
3257 | XML::Parser's handler API - it is obselete). | |||
3258 | ||||
3259 | For tree-based parsing, you could choose between the 'Perlish' approach of | |||
3260 | L<XML::Twig> and more standards based DOM implementations - preferably one with | |||
3261 | XPath support. | |||
3262 | ||||
3263 | ||||
3264 | =head1 SEE ALSO | |||
3265 | ||||
3266 | B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. | |||
3267 | ||||
3268 | To generate documents with namespaces, L<XML::NamespaceSupport> is required. | |||
3269 | ||||
3270 | The optional caching functions require L<Storable>. | |||
3271 | ||||
3272 | Answers to Frequently Asked Questions about XML::Simple are bundled with this | |||
3273 | distribution as: L<XML::Simple::FAQ> | |||
3274 | ||||
3275 | =head1 COPYRIGHT | |||
3276 | ||||
3277 | Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> | |||
3278 | ||||
3279 | This library is free software; you can redistribute it and/or modify it | |||
3280 | under the same terms as Perl itself. | |||
3281 | ||||
3282 | =cut | |||
3283 | ||||
3284 |