File Coverage

File:blib/lib/XML/Twig.pm
Coverage:90.4%

linestmtbrancondsubpodtimecode
1# $Id: /xmltwig/trunk/Twig_pm.slow 30 2007-11-13T18:10:03.393214Z mrodrigu $
2#
3# Copyright (c) 1999-2004 Michel Rodriguez
4# All rights reserved.
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the same terms as Perl itself.
8#
9
10# This is created in the caller's space
11BEGIN
12
92
6
2203
302
{ sub ::PCDATA { '#PCDATA' }
13
5
723
  sub ::CDATA { '#CDATA' }
14}
15
16
92
92
92
87842
1344
2919
use UNIVERSAL qw(isa);
17
18######################################################################
19package XML::Twig;
20######################################################################
21
22require 5.004;
23
92
92
92
4245
618
1876
use strict;
24
25
92
92
92
119524
739
2994
use utf8; # > perl 5.5
26
27
92
92
92
4598
1017
2436
use vars qw($VERSION @ISA %valid_option);
28
92
92
92
2417
759
3600
use Carp;
29
30
92
92
92
2943
716
3489
use File::Spec;
31
92
92
92
2245
1498
4067
use File::Basename;
32
33
92
92
92
2373
945
1812
use UNIVERSAL qw(isa);
34
35# constants: element types
36
92
92
92
2634
1087
3450
use constant (PCDATA => '#PCDATA');
37
92
92
92
3384
647
2168
use constant (CDATA => '#CDATA');
38
92
92
92
7315
640
1834
use constant (PI => '#PI');
39
92
92
92
2716
611
4368
use constant (COMMENT => '#COMMENT');
40
92
92
92
2879
623
2367
use constant (ENT => '#ENT');
41
42# element classes
43
92
92
92
2689
735
2196
use constant (ELT => '#ELT');
44
92
92
92
2792
1234
2113
use constant (TEXT => '#TEXT');
45
46# element properties
47
92
92
92
2751
643
1821
use constant (ASIS => '#ASIS');
48
92
92
92
2163
861
2120
use constant (EMPTY => '#EMPTY');
49
50# used in parseurl to set the buffer size to the same size as in XML::Parser::Expat
51
92
92
92
2550
1032
1736
use constant (BUFSIZE => 32768);
52
53
54# used to store the gi's
55my %gi2index; # gi => index
56my @index2gi; # list of gi's
57my $SPECIAL_GI; # first non-special gi;
58my %base_ent; # base entity character => replacement
59
60# flag, set to true if the weaken sub is available
61
92
92
92
3782
911
2190
use vars qw( $weakrefs);
62
63
64
65# xml name (leading # allowed)
66# first line is for perl 5.005, second line for modern perl, that accept character classes
67my $REG_NAME = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*)}; # does not work for leading non-ascii letters
68   $REG_NAME = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*)}; # > perl 5.5
69
70# name or wildcard (* or '') (leading # allowed)
71my $REG_NAME_W = q{(?:(?:[^\W\d]|[:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # does not work for leading non-ascii letters
72   $REG_NAME_W = q{(?:(?:[[:alpha:]:#])(?:[\w.-]*:)?[\w.-]*|\*)}; # > perl 5.5
73
74my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp
75my $REG_REGEXP_EXP = q{(?:(?:[^\\/]|\\.)*)}; # content of a regexp
76my $REG_REGEXP_MOD = q{(?:[eimso]*)}; # regexp modifiers
77my $REG_MATCH = q{[!=]~}; # match (or not)
78my $REG_STRING = q{(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*')}; # string (simple or double quoted)
79my $REG_NUMBER = q{(?:\d+(?:\.\d*)?|\.\d+)}; # number
80my $REG_VALUE = qq{(?:$REG_STRING|$REG_NUMBER)}; # value
81my $REG_OP = q{==|!=|>|<|>=|<=|eq|ne|lt|gt|le|ge|=}; # op
82my $REG_FUNCTION = q{(?:string|text)\(\s*\)};
83my $REG_STRING_ARG = qq{(?:string|text)\\(\\s*$REG_NAME_W\\s*\\)};
84my $REG_COMP = q{(?:>=|<=|!=|<|>|=)};
85
86
87# used in the handler trigger code
88my $REG_NAKED_PREDICATE= qq{((?:"[^"]*"|'[^']*'|$REG_STRING_ARG|$REG_FUNCTION|\@$REG_NAME_W|$REG_MATCH\\s*$REG_REGEXP|[\\s\\d><=!()+.-]|(?i:and)|(?i:or))*)};
89my $REG_PREDICATE= qq{\\[$REG_NAKED_PREDICATE\\]};
90
91# not all axis, only supported ones (in get_xpath)
92my @supported_axis= ( 'ancestor', 'ancestor-or-self', 'child', 'descendant', 'descendant-or-self',
93                      'following', 'following-sibling', 'parent', 'preceding', 'preceding-sibling', 'self'
94                    );
95my $REG_AXIS = "(?:" . join( '|', @supported_axis) .")";
96
97# only used in the "xpath"engine (for get_xpath/findnodes) for now
98my $REG_PREDICATE_ALT = qr{\[(?:(?:string\(\s*\)|\@$REG_NAME)\s*$REG_MATCH\s*$REG_REGEXP\s*|[^\]]*)\]};
99
100# used to convert XPath tests on strings to the perl equivalent
101my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le ');
102
103my $parser_version;
104my( $FB_HTMLCREF, $FB_XMLCREF);
105
106BEGIN
107{
108
92
1822
$VERSION = '3.32';
109
110
92
92
92
3489
775
4569
use XML::Parser;
111
92
1376
my $needVersion = '2.23';
112
92
1087
$parser_version= $XML::Parser::VERSION;
113
92
2710
croak "need at least XML::Parser version $needVersion" unless $parser_version >= $needVersion;
114
115
92
1961
if( $] >= 5.008)
116
92
92
92
92
918
4633
873
5178
  { eval "use Encode qw( :all)";
117
92
13506
    $FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF;
118
92
8773
    $FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF;
119  }
120
121# test whether we can use weak references
122# set local empty signal handler to trap error messages
123
92
92
829
2232
{ local $SIG{__DIE__};
124
92
12484
  if( eval( 'require Scalar::Util') && defined( \&Scalar::Util::weaken))
125
92
92
3634
3117
    { import Scalar::Util( 'weaken'); $weakrefs= 1; }
126  elsif( eval( 'require WeakRef'))
127
0
0
0
0
    { import WeakRef; $weakrefs= 1; }
128  else
129
0
0
    { $weakrefs= 0; }
130}
131
132
92
4762
import XML::Twig::Elt;
133
92
2701
import XML::Twig::Entity;
134
92
2783
import XML::Twig::Entity_list;
135
136# used to store the gi's
137# should be set for each twig really, at least when there are several
138# the init ensures that special gi's are always the same
139
140# gi => index
141# do NOT use => or the constants become quoted!
142
92
3768
%XML::Twig::gi2index=( PCDATA, 0, CDATA, 1, PI, 2, COMMENT, 3, ENT, 4);
143# list of gi's
144
92
2985
@XML::Twig::index2gi=( PCDATA, CDATA, PI, COMMENT, ENT);
145
146# gi's under this value are special
147
92
1549
$XML::Twig::SPECIAL_GI= @XML::Twig::index2gi;
148
149
92
4019
%XML::Twig::base_ent= ( '>' => '&gt;', '<' => '&lt;', '&' => '&amp;', "'" => '&apos;', '"' => '&quot;',);
150
151# now set some aliases
152
92
4073
*find_nodes = *get_xpath; # same as XML::XPath
153
92
1472
*findnodes = *get_xpath; # same as XML::LibXML
154
92
1065
*getElementsByTagName = *descendants;
155
92
1241
*descendants_or_self = *descendants; # valid in XML::Twig, not in XML::Twig::Elt
156
92
1111
*find_by_tag_name = *descendants;
157
92
1425
*getElementById = *elt_id;
158
92
5861
*getEltById = *elt_id;
159
92
1487
*toString = *sprint;
160}
161
162@ISA = qw(XML::Parser);
163
164# fake gi's used in twig_handlers and start_tag_handlers
165my $ALL = '_all_'; # the associated function is always called
166my $DEFAULT= '_default_'; # the function is called if no other handler has been
167
168# some defaults
169my $COMMENTS_DEFAULT= 'keep';
170my $PI_DEFAULT = 'keep';
171
172
173# handlers used in regular mode
174my %twig_handlers=( Start => \&_twig_start,
175                    End => \&_twig_end,
176                    Char => \&_twig_char,
177                    Entity => \&_twig_entity,
178                    XMLDecl => \&_twig_xmldecl,
179                    Doctype => \&_twig_doctype,
180                    Element => \&_twig_element,
181                    Attlist => \&_twig_attlist,
182                    CdataStart => \&_twig_cdatastart,
183                    CdataEnd => \&_twig_cdataend,
184                    Proc => \&_twig_pi,
185                    Comment => \&_twig_comment,
186                    Default => \&_twig_default,
187                    ExternEnt => \&_twig_extern_ent,
188      );
189
190# handlers used when twig_roots is used and we are outside of the roots
191my %twig_handlers_roots=
192  ( Start => \&_twig_start_check_roots,
193    End => \&_twig_end_check_roots,
194    Doctype => \&_twig_doctype,
195    Char => undef, Entity => undef, XMLDecl => \&_twig_xmldecl,
196    Element => undef, Attlist => undef, CdataStart => undef,
197    CdataEnd => undef, Proc => undef, Comment => undef,
198    Proc => \&_twig_pi_check_roots,
199    Default => sub {}, # hack needed for XML::Parser 2.27
200    ExternEnt => \&_twig_extern_ent,
201  );
202
203# handlers used when twig_roots and print_outside_roots are used and we are
204# outside of the roots
205my %twig_handlers_roots_print_2_30=
206  ( Start => \&_twig_start_check_roots,
207    End => \&_twig_end_check_roots,
208    Char => \&_twig_print,
209    Entity => \&_twig_print_entity,
210    ExternEnt => \&_twig_print_entity,
211    DoctypeFin => \&_twig_doctype_fin_print,
212    XMLDecl => \&_twig_print,
213    Doctype => \&_twig_print_doctype, # because recognized_string is broken here
214    # Element => \&_twig_print, Attlist => \&_twig_print,
215    CdataStart => \&_twig_print, CdataEnd => \&_twig_print,
216    Proc => \&_twig_pi_check_roots, Comment => \&_twig_print,
217    Default => \&_twig_print_check_doctype,
218    ExternEnt => \&_twig_extern_ent,
219  );
220
221# handlers used when twig_roots, print_outside_roots and keep_encoding are used
222# and we are outside of the roots
223my %twig_handlers_roots_print_original_2_30=
224  ( Start => \&_twig_start_check_roots,
225    End => \&_twig_end_check_roots,
226    Char => \&_twig_print_original,
227    # I have no idea why I should not be using this handler!
228    Entity => \&_twig_print_entity,
229    ExternEnt => \&_twig_print_entity,
230    DoctypeFin => \&_twig_doctype_fin_print,
231    XMLDecl => \&_twig_print_original,
232    Doctype => \&_twig_print_original_doctype, # because original_string is broken here
233    Element => \&_twig_print_original, Attlist => \&_twig_print_original,
234    CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
235    Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original,
236    Default => \&_twig_print_original_check_doctype,
237  );
238
239# handlers used when twig_roots and print_outside_roots are used and we are
240# outside of the roots
241my %twig_handlers_roots_print_2_27=
242  ( Start => \&_twig_start_check_roots,
243    End => \&_twig_end_check_roots,
244    Char => \&_twig_print,
245    # if the Entity handler is set then it prints the entity declaration
246    # before the entire internal subset (including the declaration!) is output
247    Entity => sub {},
248    XMLDecl => \&_twig_print, Doctype => \&_twig_print,
249    CdataStart => \&_twig_print, CdataEnd => \&_twig_print,
250    Proc => \&_twig_pi_check_roots, Comment => \&_twig_print,
251    Default => \&_twig_print,
252    ExternEnt => \&_twig_extern_ent,
253  );
254
255# handlers used when twig_roots, print_outside_roots and keep_encoding are used
256# and we are outside of the roots
257my %twig_handlers_roots_print_original_2_27=
258  ( Start => \&_twig_start_check_roots,
259    End => \&_twig_end_check_roots,
260    Char => \&_twig_print_original,
261    # for some reason original_string is wrong here
262    # this can be a problem if the doctype includes non ascii characters
263    XMLDecl => \&_twig_print, Doctype => \&_twig_print,
264    # if the Entity handler is set then it prints the entity declaration
265    # before the entire internal subset (including the declaration!) is output
266    Entity => sub {},
267    #Element => undef, Attlist => undef,
268    CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
269    Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original,
270    Default => \&_twig_print, # _twig_print_original does not work
271    ExternEnt => \&_twig_extern_ent,
272  );
273
274
275my %twig_handlers_roots_print= $parser_version > 2.27
276                               ? %twig_handlers_roots_print_2_30
277                               : %twig_handlers_roots_print_2_27;
278my %twig_handlers_roots_print_original= $parser_version > 2.27
279                               ? %twig_handlers_roots_print_original_2_30
280                               : %twig_handlers_roots_print_original_2_27;
281
282
283# handlers used when the finish_print method has been called
284my %twig_handlers_finish_print=
285  ( Start => \&_twig_print,
286    End => \&_twig_print, Char => \&_twig_print,
287    Entity => \&_twig_print, XMLDecl => \&_twig_print,
288    Doctype => \&_twig_print, Element => \&_twig_print,
289    Attlist => \&_twig_print, CdataStart => \&_twig_print,
290    CdataEnd => \&_twig_print, Proc => \&_twig_print,
291    Comment => \&_twig_print, Default => \&_twig_print,
292    ExternEnt => \&_twig_extern_ent,
293  );
294
295# handlers used when the finish_print method has been called and the keep_encoding
296# option is used
297my %twig_handlers_finish_print_original=
298  ( Start => \&_twig_print_original, End => \&_twig_print_end_original,
299    Char => \&_twig_print_original, Entity => \&_twig_print_original,
300    XMLDecl => \&_twig_print_original, Doctype => \&_twig_print_original,
301    Element => \&_twig_print_original, Attlist => \&_twig_print_original,
302    CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original,
303    Proc => \&_twig_print_original, Comment => \&_twig_print_original,
304    Default => \&_twig_print_original,
305  );
306
307# handlers used within ignored elements
308my %twig_handlers_ignore=
309  ( Start => \&_twig_ignore_start,
310    End => \&_twig_ignore_end,
311    Char => undef, Entity => undef, XMLDecl => undef,
312    Doctype => undef, Element => undef, Attlist => undef,
313    CdataStart => undef, CdataEnd => undef, Proc => undef,
314    Comment => undef, Default => undef,
315    ExternEnt => undef,
316  );
317
318
319# those handlers are only used if the entities are NOT to be expanded
320my %twig_noexpand_handlers= ( ExternEnt => undef, Default => \&_twig_default );
321
322my @saved_default_handler;
323
324my $ID= 'id'; # default value, set by the Id argument
325
326# all allowed options
327%valid_option=
328    ( # XML::Twig options
329      TwigHandlers => 1, Id => 1,
330      TwigRoots => 1, TwigPrintOutsideRoots => 1,
331      StartTagHandlers => 1, EndTagHandlers => 1,
332      ForceEndTagHandlersUsage => 1,
333      DoNotChainHandlers => 1,
334      IgnoreElts => 1,
335      Index => 1,
336      CharHandler => 1,
337      TopDownHandlers => 1,
338      KeepEncoding => 1, DoNotEscapeAmpInAtts => 1,
339      ParseStartTag => 1, KeepAttsOrder => 1,
340      LoadDTD => 1, DTDHandler => 1,
341      DoNotOutputDTD => 1, NoProlog => 1,
342      ExpandExternalEnts => 1,
343      DiscardSpaces => 1, KeepSpaces => 1,
344      DiscardSpacesIn => 1, KeepSpacesIn => 1,
345      PrettyPrint => 1, EmptyTags => 1,
346      Quote => 'double',
347      Comments => 1, Pi => 1,
348      OutputFilter => 1, InputFilter => 1,
349      OutputTextFilter => 1,
350      OutputEncoding => 1,
351      RemoveCdata => 1,
352      EltClass => 1,
353      MapXmlns => 1, KeepOriginalPrefix => 1,
354      SkipMissingEnts => 1,
355      # XML::Parser options
356      ErrorContext => 1, ProtocolEncoding => 1,
357      Namespaces => 1, NoExpand => 1,
358      Stream_Delimiter => 1, ParseParamEnt => 1,
359      NoLWP => 1, Non_Expat_Options => 1,
360      Xmlns => 1,
361    );
362
363# predefined input and output filters
364
92
92
92
2777
660
4189
use vars qw( %filter);
365%filter= ( html => \&html_encode,
366           safe => \&safe_encode,
367           safe_hex => \&safe_encode_hex,
368         );
369
370
371# trigger types (used to sort them)
372my ($XPATH_TRIGGER, $REGEXP_TRIGGER, $LEVEL_TRIGGER)=(1..3);
373
374sub new
375
2771
1831680
  { my ($class, %args) = @_;
376
2771
28164
    my $handlers;
377
378    # change all nice_perlish_names into nicePerlishNames
379
2771
117369
    %args= _normalize_args( %args);
380
381    # check options
382
2771
81065
    unless( $args{MoreOptions})
383
2770
73091
      { foreach my $arg (keys %args)
384
5054
138885
        { carp "invalid option $arg" unless $valid_option{$arg}; }
385      }
386
387    # a twig is really an XML::Parser
388    # my $self= XML::Parser->new(%args);
389
2771
37292
    my $self;
390
2771
98716
    $self= XML::Parser->new(%args);
391
392
2771
1877222
    bless $self, $class;
393
394
2771
93904
    $self->{_twig_context_stack}= [];
395
396
2771
65874
    if( exists $args{TwigHandlers})
397
162
4053
      { $handlers= $args{TwigHandlers};
398
162
3443
        $self->setTwigHandlers( $handlers);
399
156
3385
        delete $args{TwigHandlers};
400      }
401
402    # take care of twig-specific arguments
403
2765
59133
    if( exists $args{StartTagHandlers})
404
23
507
      { $self->setStartTagHandlers( $args{StartTagHandlers});
405
23
337
        delete $args{StartTagHandlers};
406      }
407
408
2765
119352
    if( exists $args{DoNotChainHandlers})
409
1
30
      { $self->{twig_do_not_chain_handlers}= $args{DoNotChainHandlers}; }
410
411
2765
55449
    if( exists $args{IgnoreElts})
412      { # change array to hash so you can write ignore_elts => [ qw(foo bar baz)]
413
4
0
0
0
162
0
0
0
        if( isa( $args{IgnoreElts}, 'ARRAY')) { $args{IgnoreElts}= { map { $_ => 1 } @{$args{IgnoreElts}} }; }
414
4
483
        $self->setIgnoreEltsHandlers( $args{IgnoreElts});
415
4
68
        delete $args{IgnoreElts};
416      }
417
418
2765
52293
    if( exists $args{Index})
419
2
27
      { my $index= $args{Index};
420        # we really want a hash name => path, we turn an array into a hash if necessary
421
2
38
        if( ref( $index) eq 'ARRAY')
422
1
2
15
38
          { my %index= map { $_ => $_ } @$index;
423
1
19
            $index= \%index;
424          }
425
2
52
        while( my( $name, $exp)= each %$index)
426
3
4
4
4
13057
26
112
562
          { $self->setTwigHandler( $exp, sub { push @{$_[0]->{_twig_index}->{$name}}, $_; 1; }); }
427      }
428
429
2765
157779
    $self->{twig_elt_class}= $args{EltClass} || 'XML::Twig::Elt';
430
2765
62
89949
944
    if( exists( $args{EltClass})) { delete $args{EltClass}; }
431
432
2765
49036
    if( exists( $args{MapXmlns}))
433
15
304
      { $self->{twig_map_xmlns}= $args{MapXmlns};
434
15
225
        $self->{Namespaces}=1;
435
15
201
        delete $args{MapXmlns};
436      }
437
438
2765
95348
    if( exists( $args{KeepOriginalPrefix}))
439
4
75
      { $self->{twig_keep_original_prefix}= $args{KeepOriginalPrefix};
440
4
50
        delete $args{KeepOriginalPrefix};
441      }
442
443
2765
63128
    $self->{twig_dtd_handler}= $args{DTDHandler};
444
2765
66458
    delete $args{DTDHandler};
445
446
2765
66838
    if( $args{ExpandExternalEnts})
447
4
69
      { $self->set_expand_external_entities( 1);
448
4
80
        $self->{twig_expand_external_ents}= $args{ExpandExternalEnts};
449
4
62
        $self->{twig_read_external_dtd}= 1; # implied by ExpandExternalEnts
450
4
74
        if( $args{ExpandExternalEnts} == -1)
451
1
16
          { $self->{twig_extern_ent_nofail}= 1;
452
1
67
            $self->setHandlers( ExternEnt => \&_twig_extern_ent_nofail);
453          }
454
4
257
        delete $args{LoadDTD};
455
4
52
        delete $args{ExpandExternalEnts};
456      }
457    else
458
2761
65108
      { $self->set_expand_external_entities( 0); }
459
460
2765
143360
    if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP'))
461
0
0
      { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler }
462    else
463
2765
132369
      { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler }
464
465
2765
58810
    if( $args{DoNotEscapeAmpInAtts})
466
1
58
      { $self->set_do_not_escape_amp_in_atts( 1);
467
1
20
        $self->{twig_do_not_escape_amp_in_atts}=1;
468      }
469    else
470
2764
56703
      { $self->set_do_not_escape_amp_in_atts( 0);
471
2764
57152
        $self->{twig_do_not_escape_amp_in_atts}=0;
472      }
473
474    # deal with TwigRoots argument, a hash of elements for which
475    # subtrees will be built (and associated handlers)
476
477
2765
50432
    if( $args{TwigRoots})
478
63
3059
      { $self->setTwigRoots( $args{TwigRoots});
479
61
914
        delete $args{TwigRoots};
480      }
481
482
2763
64480
    if( $args{EndTagHandlers})
483
11
1082
      { unless ($self->{twig_roots} || $args{ForceEndTagHandlersUsage})
484
1
27
          { croak "you should not use EndTagHandlers without TwigRoots\n",
485                  "if you want to use it anyway, normally because you have ",
486                  "a start_tag_handlers that calls 'ignore' and you want to ",
487                  "call an ent_tag_handlers at the end of the element, then ",
488                  "pass 'force_end_tag_handlers_usage => 1' as an argument ",
489                  "to new";
490          }
491
492
10
213
        $self->setEndTagHandlers( $args{EndTagHandlers});
493
10
149
        delete $args{EndTagHandlers};
494      }
495
496
2762
46250
    if( $args{TwigPrintOutsideRoots})
497
34
653
      { croak "cannot use TwigPrintOutsideRoots without TwigRoots"
498          unless( $self->{twig_roots});
499        # if the arg is a filehandle then store it
500
33
544
        if( _is_fh( $args{TwigPrintOutsideRoots}) )
501
31
659
          { $self->{twig_output_fh}= $args{TwigPrintOutsideRoots}; }
502
33
843
        $self->{twig_default_print}= $args{TwigPrintOutsideRoots};
503      }
504
505    # space policy
506
2761
64880
    if( $args{KeepSpaces})
507
18
313
      { croak "cannot use both keep_spaces and discard_spaces" if( $args{DiscardSpaces});
508
17
3555
        croak "cannot use both keep_spaces and keep_spaces_in" if( $args{KeepSpacesIn});
509
16
251
        $self->{twig_keep_spaces}=1;
510
16
222
        delete $args{KeepSpaces};
511      }
512
2759
54854
    if( $args{DiscardSpaces})
513
2
46
      { croak "cannot use both discard_spaces and keep_spaces_in" if( $args{KeepSpacesIn});
514
1
16
        $self->{twig_discard_spaces}=1;
515
1
14
        delete $args{DiscardSpaces};
516      }
517
2758
291632
    if( $args{KeepSpacesIn})
518
8
212
      { croak "cannot use both keep_spaces_in and discard_spaces_in" if( $args{DiscardSpacesIn});
519
7
617
        $self->{twig_discard_spaces}=1;
520
7
121
        $self->{twig_keep_spaces_in}={};
521
7
7
67
139
        my @tags= @{$args{KeepSpacesIn}};
522
7
9
100
225
        foreach my $tag (@tags) { $self->{twig_keep_spaces_in}->{$tag}=1; }
523
7
402
        delete $args{KeepSpacesIn};
524      }
525
2757
55677
    if( $args{DiscardSpacesIn})
526
4
60
      { $self->{twig_keep_spaces}=1;
527
4
62
        $self->{twig_discard_spaces_in}={};
528
4
4
28
369
        my @tags= @{$args{DiscardSpacesIn}};
529
4
6
61
145
        foreach my $tag (@tags) { $self->{twig_discard_spaces_in}->{$tag}=1; }
530
4
62
        delete $args{DiscardSpacesIn};
531      }
532    # discard spaces by default
533
2757
118901
    $self->{twig_discard_spaces}= 1 unless( $self->{twig_keep_spaces});
534
535
2757
68625
    $args{Comments}||= $COMMENTS_DEFAULT;
536
2757
3
544586
47
    if( $args{Comments} eq 'drop') { $self->{twig_keep_comments}= 0; }
537
1868
35879
    elsif( $args{Comments} eq 'keep') { $self->{twig_keep_comments}= 1; }
538
885
19399
    elsif( $args{Comments} eq 'process') { $self->{twig_process_comments}= 1; }
539
1
31
    else { croak "wrong value for comments argument: '$args{Comments}' (should be 'drop', 'keep' or 'process')"; }
540
2756
45504
    delete $args{Comments};
541
542
2756
56391
    $args{Pi}||= $PI_DEFAULT;
543
2756
2
107207
36
    if( $args{Pi} eq 'drop') { $self->{twig_keep_pi}= 0; }
544
1870
38629
    elsif( $args{Pi} eq 'keep') { $self->{twig_keep_pi}= 1; }
545
883
16090
    elsif( $args{Pi} eq 'process') { $self->{twig_process_pi}= 1; }
546
1
30
    else { croak "wrong value for pi argument: '$args{Pi}' (should be 'drop', 'keep' or 'process')"; }
547
2755
40461
    delete $args{Pi};
548
549
2755
51621
    if( $args{KeepEncoding})
550      {
551        # set it in XML::Twig::Elt so print functions know what to do
552
968
21324
        $self->set_keep_encoding( 1);
553
968
51991
        $self->{parse_start_tag}= $args{ParseStartTag} || \&_parse_start_tag;
554
968
36368
        delete $args{ParseStartTag} if defined( $args{ParseStartTag}) ;
555
968
14729
        delete $args{KeepEncoding};
556      }
557    else
558
1787
44944
      { $self->set_keep_encoding( 0);
559
1787
42583
        $self->{parse_start_tag}= $args{ParseStartTag} if( $args{ParseStartTag});
560      }
561
562
2755
47989
    if( $args{OutputFilter})
563
5
106
      { $self->set_output_filter( $args{OutputFilter});
564
5
72
        delete $args{OutputFilter};
565      }
566    else
567
2750
61271
      { $self->set_output_filter( 0); }
568
569
2755
55723
    if( $args{RemoveCdata})
570
1
20
      { $self->set_remove_cdata( $args{RemoveCdata});
571
1
11
        delete $args{RemoveCdata};
572      }
573    else
574
2754
49248
      { $self->set_remove_cdata( 0); }
575
576
2755
48891
    if( $args{OutputTextFilter})
577
5
92
      { $self->set_output_text_filter( $args{OutputTextFilter});
578
5
74
        delete $args{OutputTextFilter};
579      }
580    else
581
2750
52796
      { $self->set_output_text_filter( 0); }
582
583
584
2755
50439
    if( exists $args{KeepAttsOrder})
585
7
134
      { $self->{keep_atts_order}= $args{KeepAttsOrder};
586
7
547
        if( _use( 'Tie::IxHash'))
587
6
132
          { $self->set_keep_atts_order( $self->{keep_atts_order}); }
588        else
589
1
16
          { croak "Tie::IxHash not available, option keep_atts_order not allowed"; }
590      }
591    else
592
2748
57028
      { $self->set_keep_atts_order( 0); }
593
594
595
2754
42
58655
807
    if( $args{PrettyPrint}) { $self->set_pretty_print( $args{PrettyPrint}); }
596
2754
1
51785
24
    if( $args{Quote}) { $self->set_quote( $args{Quote}); }
597
2754
12
43740
212
    if( $args{EmptyTags}) { $self->set_empty_tag_style( $args{EmptyTags}) }
598
599
2754
1
1
50787
18
12
    if( exists $args{Id}) { $ID= $args{Id}; delete $args{ID}; }
600
2754
3
3
45545
43
41
    if( $args{NoProlog}) { $self->{no_prolog}= 1; delete $args{NoProlog}; }
601
2754
2
2
44351
40
27
    if( $args{DoNotOutputDTD}) { $self->{no_dtd_output}= 1; delete $args{DoNotOutputDTD}; }
602
2754
4
4
45066
73
52
    if( $args{LoadDTD}) { $self->{twig_read_external_dtd}= 1; delete $args{LoadDTD}; }
603
2754
1
1
50283
26
15
    if( $args{CharHandler}) { $self->setCharHandler( $args{CharHandler}); delete $args{CharHandler}; }
604
605
2754
3
1
53405
61
15
    if( $args{InputFilter}) { $self->set_input_filter( $args{InputFilter}); delete $args{InputFilter}; }
606
2752
1
1
46367
25
294
    if( $args{NoExpand}) { $self->setHandlers( %twig_noexpand_handlers); $self->{twig_no_expand}=1; }
607
2752
1
1
51333
17
15
    if( my $output_encoding= $args{OutputEncoding}) { $self->set_output_encoding( $output_encoding); delete $args{OutputFilter}; }
608
609
2752
4
4
57416
56
48
    if( my $tdh= $args{TopDownHandlers}) { $self->{twig_tdh}=1; delete $args{TopDownHandlers}; }
610
611    # set handlers
612
2752
57887
    if( $self->{twig_roots})
613
61
1368
      { if( $self->{twig_default_print})
614
33
566
          { if( $self->{twig_keep_encoding})
615
7
375
              { $self->setHandlers( %twig_handlers_roots_print_original); }
616            else
617
26
1215
              { $self->setHandlers( %twig_handlers_roots_print); }
618          }
619        else
620
28
1363
          { $self->setHandlers( %twig_handlers_roots); }
621      }
622    else
623
2691
172260
      { $self->setHandlers( %twig_handlers); }
624
625    # XML::Parser::Expat does not like these handler to be set. So in order to
626    # use the various sets of handlers on XML::Parser or XML::Parser::Expat
627    # objects when needed, these ones have to be set only once, here, at
628    # XML::Parser level
629
2752
6282070
    $self->setHandlers( Init => \&_twig_init, Final => \&_twig_final);
630
631
2752
1210120
    $self->{twig_entity_list}= XML::Twig::Entity_list->new;
632
633
2752
253756
    $self->{twig_id}= $ID;
634
2752
52168
    $self->{twig_stored_spaces}='';
635
636
2752
52618
    $self->{twig_autoflush}= 1; # auto flush by default
637
638
2752
49337
    $self->{twig}= $self;
639
2752
127792
    weaken( $self->{twig}) if( $weakrefs);
640
641
2752
83909
    return $self;
642  }
643
644sub parse
645  {
646
2810
105850
    my $t= shift;
647    # if called as a class method, calls nparse, which creates the twig then parses it
648
2810
35
220121
715
    if( !ref( $t) || !isa( $t, 'XML::Twig')) { return $t->nparse( @_); }
649
650    # requires 5.006 at least (or the ${^UNICODE} causes a problem) # > perl 5.5
651    # trap underlying bug in IO::Handle (see RT #17500) # > perl 5.5
652    # croak if perl 5.8+, -CD (or PERL_UNICODE set to D) and parsing a pipe # > perl 5.5
653
2775
224995
    if( $]>=5.008 && ${^UNICODE} && (${^UNICODE} & 24) && isa( $_[0], 'GLOB') && -p $_[0] ) # > perl 5.5
654
0
0
      { croak "cannot parse the output of a pipe when perl is set to use the UTF8 perlIO layer\n" # > perl 5.5
655              . "set the environment variable PERL_UNICODE or use the -C option (see perldoc perlrun)\n" # > perl 5.5
656              . "not to include 'D'"; # > perl 5.5
657      } # > perl 5.5
658
2775
2775
37292
155273
    $t= eval { $t->SUPER::parse( @_); };
659
2775
551470
    return _checked_parse_result( $t, $@);
660  }
661
662sub parsefile
663
27
341
  { my $t= shift;
664
27
27
222
1398
    $t= eval { $t->SUPER::parsefile( @_); };
665
27
1079
    return _checked_parse_result( $t, $@);
666  }
667
668sub _checked_parse_result
669
2802
54751
  { my( $t, $returned)= @_;
670
2802
47428
    if( !$t)
671
22
1268
      { if( isa( $returned, 'XML::Twig') && $returned->{twig_finish_now})
672
6
50
          { $t= $returned;
673
6
79
            delete $t->{twig_finish_now};
674
6
94
            return $t->_twig_final;
675          }
676        else
677
16
333
          { _croak( $returned, 0); }
678      }
679
2780
134834
    return $t;
680  }
681
682sub finish_now
683