#!/usr/bin/perl -w use strict; use XML::Simple; use FindBin qw($Bin); use lib $Bin; use wtr2_base; my $DEBUG=0; init_db(); # XML::Simple cannot output properly the document # the order of the elements will be lost my $CAN_OUTPUT= 0; my @files= @ARGV || (<$dir{invoices}/*.xml>); foreach my $file (@files) { my $xml= XMLin( $file, forcearray => [ qw(InvoiceRow)], forcecontent => 1); my $errors= check_invoice( $xml); if( !@$errors) { store_invoice( $xml); } else { print "ERROR in $file\n ", join( "\n ", @$errors), "\n"; if( $CAN_OUTPUT) { my $rejected_file= rejected( $file); print "adding errors in $rejected_file\n" if( $DEBUG); add_errors( $xml, $errors); output_doc_to_check( $rejected_file, $xml); } } } exit; sub check_invoice { my( $xml)= @_; my $errors=[]; # array ref, holds the error messages check_buyer( $xml->{BuyerPartyDetails}->{BuyerPartyIdentifier}->{content}, $xml->{BuyerPartyDetails}->{BuyerOrganisationName}->{content}, $errors ); check_po( $xml->{InvoiceDetails}->{OrderIdentifier}->{content}, $errors); my @rows= @{$xml->{InvoiceRow}}; reset_default_row_id(); foreach my $row( @rows) { # this does not cope well with broken row numbers my $row_id= $row->{RowIdentifier}->{content} || default_row_id(); print "checking row $row_id\n" if $DEBUG; check_qtty( $row_id, $row->{DeliveredQuantity}->{content}, $row->{DeliveredQuantity}->{QuantityUnitCode}, $row->{OrderedQuantity}->{content}, $row->{OrderedQuantity}->{QuantityUnitCode}, $errors ); } return $errors; } sub store_invoice { my( $xml)= @_; print "storing invoice $xml->{InvoiceDetails}->{InvoiceNumber}->{content}\n"; # build the various data structures my $data; my $invoice = $xml->{InvoiceDetails}; $data->{invoice} = { number => $invoice->{InvoiceNumber}->{content}, date => $invoice->{InvoiceDate}->{content}, po => $invoice->{OrderIdentifier}->{content}, amount_no_tax => $invoice->{InvoiceTotalVatExcludedAmount}->{content}, tax => $invoice->{InvoiceTotalVatAmount}->{content}, amount => $invoice->{InvoiceTotalVatIncludedAmount}->{content}, payment_status => $xml->{PaymentStatusDetails}->{PaymentStatusCode}->{content}, }; my $seller = $xml->{SellerPartyDetails}; $data->{seller} = { identifier => $seller->{SellerPartyIdentifier}->{content}, name => $seller->{SellerOrganisationName}->{content}, tax_code => $seller->{SellerOrganisationTaxCode}->{content}, }; my $address = $xml->{SellerPartyDetails}->{SellerPostalAddressDetails}; $data->{address} = { street => $address->{SellerStreetName}->{content}, town => $address->{SellerTownName}->{content}, zip => $address->{SellerPostCodeIdentifier}->{content}, country_code => $address->{CountryCode}->{content}, po_box => $address->{SellerPostOfficeBoxIdentifier}->{content}, }; $data->{contact} = { name => $xml->{SellerContactPersonName}->{content}, phone => $xml->{SellerCommunicationDetails}->{SellerPhoneNumberIdentifier}->{content}, email => $xml->{SellerCommunicationDetails}->{SellerEmailaddressIdentifier}->{content}, }; $data->{invoicerow} ||= []; reset_default_row_id(); foreach my $invoicerow (@{$xml->{InvoiceRow}}) { push @{$data->{invoicerow}}, { row_id => $invoicerow->{RowIdentifier}->{content} || default_row_id(), sku => $invoicerow->{ArticleIdentifier}->{content}, name => $invoicerow->{ArticleName}->{content}, qty => $invoicerow->{DeliveredQuantity}->{content}, qty_unit => $invoicerow->{DeliveredQuantity}->{QuantityUnitCode}, unit_price => $invoicerow->{UnitPriceAmount}->{content}, amount_no_tax => $invoicerow->{RowVatExcludedAmount}->{content}, tax => $invoicerow->{RowVatAmount}->{content}, amount => $invoicerow->{RowAmount}->{content}, } } store_all( $data); } __END__ =head1 NAME wtr2_simple =head1 SYNOPSYS perl wtr2_simple =head1 DESCRIPTION L is extremely convenient for loading XML data into a Perl data structure, using C, and then forgetting that it was ever in XML. The code was very easy to write. I used YAML during the debugging phase to dump the content of the C<$xml> structure, so it was really easy to see where was the data I needed. Note the 2 options for C: =over 4 =item C<< forcearray => [ qw(InvoiceRow)] >> this will cause C to load the InvoiceRow's in an array, even if there is only one in the document (without that option a single InvoiceRow would be turned into a hash value instead of an array, which would have forced me to test it). =item C<< forcecontent => 1 >> despite the documentation for XML::Simple stating that this option is rarely used I found it made it easier and safer to write the code: all text content is stored in the C field of a hash. This makes accessing the values a little uglier (you have to add an extra C<< ->{content} >> in the expression), but in fact it saved me treating differently elements that had attributes and elements which didn't, plus it's an extra security if new attributes are added to some elements of the DTD, or for optional attributes, that would cause the generated data structure to be different depending on them being present or not (for example in C the C attribute is optional). If you use XML::Simple for data where the DTD is simpler and where attributes are always set then you might want not to use this option. In this case I found it convenient. =back Finally note that XML::Simple cannot output a modified document, as it looses the order of elements in the document (once elements are assigne to a hash their order is lost), so $CAN_OUTPUT is set to 0 and the error messages are output to the console, rather than creating a new document. This might or might not be a problem, depending on your specific case. =head1 AUTHOR Michel Rodriguez =head1 LICENSE This code is Copyright (c) 2003 Michel Rodriguez. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Comments can be sent to mirod@xmltwig.com =head1 SEE ALSO XML::Simple Ways to Rome 2 - Kourallinen Dollareita: http://www.xmltwig.com/article/ways_to_rome_2/