XMLUtil.pm
package Pitonyak::XMLUtil;
#************************************************************
=head1 NAME
Pitonyak::XMLUtil - Convert Objects to and from XML.
=head1 DESCRIPTION
A few simple XML utilities that will convert arbitrary objects to XML and back again.
These routines have not been extensively tested.
=cut
require Exporter;
$VERSION = '1.00';
@ISA = qw(Exporter);
@EXPORT = qw();
@EXPORT_OK = qw(
convert_entity_references_to_characters
convert_xml_characters_to_entity_references
object_to_xml
xml_to_object
);
use Carp;
use IO::File;
use File::Basename;
use strict;
use XML::Parser;
#************************************************************
=pod
=head2 convert_entity_references_to_characters
=over 4
=item convert_entity_references_to_characters(@strings_with_entity_refs)
=back
Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and ''' to "'".
The calling parameters are modfied
=cut
#************************************************************
my %s2x = (
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
''' => "'",
);
sub convert_entity_references_to_characters {
return undef if $#_ < 0;
for (@_) {
s/('|"|>|<|&)/"$s2x{$1}"/geos;
}
return wantarray ? @_ : $_[0];
}
#************************************************************
=pod
=head2 convert_xml_characters_to_entity_references
=over 4
=item convert_xml_characters_to_entity_references(@strings_needing_entity_refs)
=back
Change '&' to '&', '<' to '<', '>' to '>', '"' to '"', and "'" to ''',
The calling parameters are modfied.
This is used to render a string safe to send as XML.
Existing entity referenes will have their leading ampersand transformed.
=cut
#************************************************************
my %x2s = (
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
"'" => ''',
);
sub convert_xml_characters_to_entity_references {
# If called from a logger object, then simply discard
if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) {
shift;
}
return undef if $#_ < 0;
for (@_) {
s/([&<>"'])/$x2s{$1}/geo;
}
return wantarray ? @_ : $_[0];
}
#*********************************************************************
#** **
#** Input : left indentation **
#** One object to convert **
#** **
#** Output: Objects converted to XML **
#** **
#** Notes : Each parameter is a single XML string **
#** The element name identifies the type **
#** Objects are usually converted to a HASH **
#** **
#*********************************************************************
sub internal_object_to_xml {
return undef if $#_ < 0;
my $left = shift;
my $thing_to_print = shift;
my $txt;
if ( !defined($thing_to_print) ) {
$txt = '<NULL/>';
}
else {
my $ref_type = ref $thing_to_print;
if ( !$ref_type ) {
$txt = convert_xml_characters_to_entity_references($thing_to_print);
}
elsif ( $ref_type eq 'SCALAR' ) {
# If the referenced item can not be converted
# then it will not be
my $internal =
internal_object_to_xml( "$left ", $$thing_to_print );
if ( defined($internal) && length($internal) > 0 ) {
$txt = "<REF>\n$internal\n$left</REF>";
}
else {
$txt = "<REF/>";
}
}
elsif ( $ref_type eq 'ARRAY' ) {
my $internal_txt = '';
foreach my $temp_thing (@$thing_to_print) {
my $internal = internal_object_to_xml( "$left ", $temp_thing );
if ( length($internal) == 0 ) {
$internal_txt .= "$left <Value/>\n";
}
elsif ( index( $internal, '<' ) >= $[ ) {
$internal_txt .=
"$left <Value>\n$left $internal\n$left </Value>\n";
}
else {
$internal_txt .= "$left <Value>$internal</Value>\n";
}
}
if ( length($internal_txt) > 0 ) {
$txt = "<ARRAY>\n$internal_txt$left</ARRAY>";
}
else {
$txt = '<ARRAY/>';
}
}
elsif ( UNIVERSAL::isa( $thing_to_print, 'HASH' ) ) {
#
# Remember that each hash has one universal iterator
# recursive nesting will therefore cause stranger
# results than a simple infinite loop.
#
my $hash_txt = "\n";
my ( $key, $value );
while ( ( $key, $value ) = each %$thing_to_print ) {
my $value_xml;
my $key_xml;
if ( defined($value) ) {
$value_xml =
internal_object_to_xml( "$left ", $value );
}
if ( defined($key) ) {
$key_xml = internal_object_to_xml( "$left ", $key );
}
if ( defined($key) ) {
if (
index( $key_xml, '<' ) >= $[
|| ( defined($value_xml)
&& index( $value_xml, '<' ) >= $[ )
)
{
$hash_txt .= "$left <Pair>\n$left ";
if ( index( $key_xml, '<' ) >= $[ ) {
$hash_txt .=
"<Key>\n$left $key_xml\n$left </Key>\n";
}
elsif ( length($key_xml) > 0 ) {
$hash_txt .= "<Key>$key_xml</Key>\n";
}
else {
$hash_txt .= "<Key/>\n";
}
if ( defined($value_xml) ) {
if ( index( $value_xml, '<' ) >= $[ ) {
$hash_txt .=
"$left <Value>\n$left $value_xml\n$left </Value>\n";
}
elsif ( length($value_xml) > 0 ) {
$hash_txt .=
"$left <Value>$value_xml</Value>\n";
}
else {
$hash_txt .= "$left <Value/>\n";
}
}
$hash_txt .= "$left </Pair>\n";
}
elsif ( defined($key_xml) ) {
$hash_txt .= "$left <Pair>";
if ( length($key_xml) > 0 ) {
$hash_txt .= "<Key>$key_xml</Key>";
}
else {
$hash_txt .= "<Key/>";
}
if ( defined($value_xml) ) {
if ( length($value_xml) > 0 ) {
$hash_txt .= "<Value>$value_xml</Value>";
}
else {
$hash_txt .= "<Value/>";
}
}
$hash_txt .= "</Pair>\n";
}
}
}
if ( defined($hash_txt) && index( $hash_txt, '<' ) >= $[ ) {
$txt = "<HASH>$hash_txt$left</HASH>";
}
else {
$txt = '<HASH/>';
}
}
}
return $txt;
}
#************************************************************
=pod
=head2 object_to_xml
=over 4
=item object_to_xml(@objects_to_transform)
=back
Transform an object into XML.
An attempt is made to make this object human readable.
Note that if the object is a package object that is referenced as a HASH
it is still embedded as a HASH.
Each object in the array is returned as a separate XML string.
An object that is not defined is returned as C<E<lt>NULLE<sol>E<gt>>>
A SCALAR is rendered XML safe by converting special characters to entity references.
It is otherwise left unchanged.
A Reference to a SCALAR is encoded as C<E<lt>REFE<sol>E<gt>> for a zero length SCALAR and as
C<E<lt>REFE<gt>valueE<lt>E<sol>REFE<gt>>
An ARRAY reference is encoded as either C<E<lt>ARRAYE<sol>E<gt>> or something similar to
C<E<lt>ARRAYE<gt>E<lt>VALUEE<gt>valueE<lt>E<sol>VALUEE<gt>E<lt>E<sol>ARRAYE<gt>>.
A HASH reference is encoded as
C<E<lt>HASHE<gt>E<lt>PAIRE<gt>E<lt>KEYE<gt>valueE<lt>E<sol>KEYE<gt>E<lt>VALUEE<gt>valueE<lt>VALUEE<gt>E<lt>E<sol>PAIRE<gt>E<lt>E<sol>HASHE<gt>>
A PAIR may be missing a C<VALUE> which means that it is undefined.
If a value is really the intended value, then it is rendered XML safe by using entity references
and no extra space is used. If the value is a reference to something else then the object is converted
to XML using extra white space and indentation for easier reading.
=cut
#************************************************************
sub object_to_xml {
return undef if $#_ < 0;
my @object_xmls = ();
foreach my $thing_to_print (@_) {
my $txt = internal_object_to_xml( '', $thing_to_print );
push @object_xmls, $txt if defined($txt) && length($txt) > 0;
}
return wantarray ? @object_xmls : $object_xmls[0];
}
#*********************************************************************
#** **
#** Input : Array reference of an XML object **
#** Index into the array from which to start **
#** **
#** Output: XML converted back to objects **
#** **
#** Notes : This takes the string from object_to_xml **
#** **
#*********************************************************************
sub internal_xml_to_object {
return undef if $#_ < 0;
if ( ref( $_[0] ) ne 'ARRAY' ) {
carp( "Array reference expected as the first parameter, not"
. ref( $_[0] ) );
return undef;
}
my $array_ref = shift;
my $tag_start = 0;
$tag_start = shift unless $#_ < 0;
if ( $tag_start > $#$array_ref ) {
confess(
"Requested an index of $tag_start when the array is not large enough"
);
return undef;
}
my $obj;
my $element_name = $array_ref->[$tag_start];
my $element;
$element = $array_ref->[ $tag_start + 1 ] if $tag_start < $#$array_ref;
if ( $element_name eq '0' ) {
if ( defined($element) ) {
$obj = convert_entity_references_to_characters($element);
}
else {
$obj = '';
}
}
elsif ( $element_name eq 'NULL' ) {
# $obj is already undefined
}
elsif ( defined($element) && ref($element) ne 'ARRAY' ) {
carp( "The element $element_name is not followed by an array, it is an "
. ref($element) );
}
elsif ( $element_name eq 'REF' ) {
my $temp = internal_xml_to_object( $element, 1 );
$obj = \$temp;
}
elsif ( $element_name eq 'ARRAY' ) {
#?? I think that this is wrong!
my @my_array = ();
my $array_len = 0;
if ( defined($element) ) {
$array_len = $#$element;
}
for ( my $i = 1 ; $i < $array_len ; $i += 2 ) {
# skip white space
if ( $element->[$i] ne '0' ) {
push @my_array, internal_xml_to_object( $element, $i );
}
}
$obj = \@my_array;
}
elsif ( $element_name eq 'HASH' ) {
my %my_hash = ();
my $array_len = 0;
if ( defined($element) ) {
$array_len = $#$element;
}
for ( my $i = 1 ; $i < $array_len ; $i += 2 ) {
# skip white space
if ( $element->[$i] eq 'Pair' ) {
my ( $key, $val );
my $internal_array = $element->[ $i + 1 ];
my $internal_array_len = $#$internal_array;
for ( my $j = 1 ; $j < $internal_array_len ; $j += 2 ) {
if ( $internal_array->[$j] eq 'Key' ) {
my $key_array = $internal_array->[ $j + 1 ];
my $length_of_key_array = $#$key_array;
if ( $length_of_key_array == 0 ) {
$key = '';
}
else {
my $idx_to_use = 1;
for (
my $k = 1 ;
$k < $length_of_key_array ;
$k += 2
)
{
$idx_to_use = $k if $key_array->[$k] ne '0';
}
$key =
internal_xml_to_object( $key_array, $idx_to_use );
}
}
elsif ( $internal_array->[$j] eq 'Value' ) {
my $val_array = $internal_array->[ $j + 1 ];
my $length_of_val_array = $#$val_array;
if ( $length_of_val_array == 0 ) {
$val = '';
}
else {
my $idx_to_use = 1;
for (
my $k = 1 ;
$k < $length_of_val_array ;
$k += 2
)
{
$idx_to_use = $k if $val_array->[$k] ne '0';
}
$val =
internal_xml_to_object( $val_array, $idx_to_use );
}
}
}
$my_hash{$key} = $val;
}
}
$obj = \%my_hash;
}
return $obj;
}
#************************************************************
=pod
=head2 xml_to_object
=over 4
=item xml_to_object(@xml_strings_to_convert_to_objects)
=back
Convert XML strings back into objects.
=cut
#************************************************************
sub xml_to_object {
# If called from a logger object, then simply discard
if ( $#_ >= 0 && UNIVERSAL::isa( $_[0], 'XMLUtil' ) ) {
shift;
}
return undef if $#_ < 0;
my @objects = ();
my $xml_parser = new XML::Parser( Style => 'Tree' );
NEXT_XML_STRING: foreach my $xml_string (@_) {
my $obj;
my $tree = $xml_parser->parsestring($xml_string);
$obj = internal_xml_to_object( $tree, 0 );
push @objects, $obj;
}
return wantarray ? @objects : $objects[0];
}
#************************************************************
=pod
=head1 COPYRIGHT
Copyright 2002, Andrew Pitonyak (perlboy@pitonyak.org)
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=head1 Modification History
=head2 September 10, 2002
Version 1.00 Initial release
=cut
#************************************************************
1;