############################################################################# ## Name: Tree.pm ## Purpose: XML::Smart::Tree ## Author: Graciliano M. P. ## Modified by: Harish Madabushi ## Created: 10/05/2003 ## RCS-ID: ## Copyright: (c) 2003 Graciliano M. P. ## Licence: This program is free software; you can redistribute it and/or ## modify it under the same terms as Perl itself ############################################################################# package XML::Smart::Tree ; use strict ; use warnings ; use Carp ; use XML::Smart::Entity qw(_parse_basic_entity) ; use XML::Smart::Shared qw( _unset_sig_warn _reset_sig_warn ) ; our ($VERSION) ; $VERSION = '1.34' ; my %PARSERS = ( XML_Parser => 0 , XML_Smart_Parser => 0 , XML_Smart_HTMLParser => 0 , ) ; ## BUG - By making DEFAULT_LOADED a global variable it is working across objects! ( Watch for possible usage elsewhere ) # my $DEFAULT_LOADED ; use vars qw($NO_XML_PARSER); ################### # LOAD_XML_PARSER # ################### sub load_XML_Parser { return if $NO_XML_PARSER ; _unset_sig_warn() ; eval('use XML::Parser ;') ; _reset_sig_warn() ; if ($@) { $@ = undef ; return( undef ) ;} my ($xml , $tree) ; _unset_sig_warn() ; eval { no strict ; my $data = '' ; $xml = XML::Parser->new(Style => 'Tree') ; $tree = $xml->parse($data) ; } ; _reset_sig_warn() ; if (!$tree || ref($tree) ne 'ARRAY') { return( undef ) ;} if ($tree->[1][2][0]{arg1} eq 't1') { return( 1 ) ;} return( undef ) ; } ######################### # LOAD_XML_SMART_PARSER # ######################### sub load_XML_Smart_Parser { _unset_sig_warn() ; eval('use XML::Smart::Parser ;') ; _reset_sig_warn() ; if ($@) { $@ = undef ; return( undef ) ;} return(1) ; } ############################# # LOAD_XML_SMART_HTMLPARSER # ############################# sub load_XML_Smart_HTMLParser { _unset_sig_warn() ; eval('use XML::Smart::HTMLParser ;') ; _reset_sig_warn() ; if ($@) { $@ = undef ; return( undef ) ;} return(1) ; } ######## # LOAD # ######## sub load { my ( $parser ) = @_ ; my $module ; my $DEFAULT_LOADED ; if ($parser) { $parser =~ s/:+/_/gs ; $parser =~ s/\W//g ; if ($parser =~ /^(?:html?|wild)$/i) { $parser = 'XML_Smart_HTMLParser' ;} elsif ($parser =~ /^(?:re|smart)/i) { $parser = 'XML_Smart_Parser' ;} foreach my $Key ( keys %PARSERS ) { if ($Key =~ /^$parser$/i) { $module = $Key ; last ;} } } my $ok ; if( $module && ( $module eq 'XML_Parser' ) ) { $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ; $ok = $PARSERS{XML_Parser} ; } elsif ( $module && ( $module eq 'XML_Smart_Parser' ) ) { $PARSERS{XML_Smart_Parser} = 1 if !$PARSERS{XML_Smart_Parser} && &load_XML_Smart_Parser() ; $ok = $PARSERS{XML_Smart_Parser} ; } elsif( $module and ( $module eq 'XML_Smart_HTMLParser' ) ) { $PARSERS{XML_Smart_HTMLParser} = 1 if !$PARSERS{XML_Smart_HTMLParser} && &load_XML_Smart_HTMLParser() ; $ok = $PARSERS{XML_Smart_HTMLParser} ; } if (!$ok && !$DEFAULT_LOADED) { $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ; $module = 'XML_Parser' ; if ( !$PARSERS{XML_Parser} ) { $PARSERS{XML_Smart_Parser} = 1 if &load_XML_Smart_Parser() ; $module = 'XML_Smart_Parser' ; } $DEFAULT_LOADED = 1 ; } return($module) ; } ######### # PARSE # ######### sub parse { my $module = $_[1] ; my $data ; { my ($fh,$open) ; if (ref($_[0]) eq 'GLOB') { $fh = $_[0] ;} elsif ($_[0] =~ /^http:\/\/\w+[^\r\n]+$/s) { $data = &get_url($_[0]) ;} elsif ($_[0] =~ /<.*?>/s) { $data = $_[0] ;} else { open ($fh,$_[0]) or croak( $! ); binmode($fh) ; $open = 1 ; } if ($fh) { no warnings ; 1 while( read($fh, $data , 1024*8 , length($data) ) ) ; close($fh) if $open ; } } if ($data !~ /<.*?>/s) { return( {} ) ;} if (!$module || !$PARSERS{$module}) { if ( !$NO_XML_PARSER && $INC{'XML/Parser.pm'} && $PARSERS{XML_Parser}) { $module = 'XML_Parser' ;} elsif ($PARSERS{XML_Smart_Parser}) { $module = 'XML_Smart_Parser' ;} } my $xml ; if ($module eq 'XML_Parser') { $xml = XML::Parser->new() ;} elsif ($module eq 'XML_Smart_Parser') { $xml = XML::Smart::Parser->new() ;} elsif ($module eq 'XML_Smart_HTMLParser') { $xml = XML::Smart::HTMLParser->new() ;} else { croak("Can't find a parser for XML!") ;} shift(@_) ; if ( $_[0] && ( $_[0] =~ /^\s*(?:XML_\w+|html?|re\w+|smart)\s*$/i ) ) { shift(@_) ;} _unset_sig_warn() ; my ( %args ) = @_ ; _reset_sig_warn() ; if ( $args{lowtag} ) { $xml->{SMART}{tag} = 1 ;} if ( $args{upertag} ) { $xml->{SMART}{tag} = 2 ;} if ( $args{lowarg} ) { $xml->{SMART}{arg} = 1 ;} if ( $args{uperarg} ) { $xml->{SMART}{arg} = 2 ;} if ( $args{arg_single} ) { $xml->{SMART}{arg_single} = 1 ;} if ( $args{no_order} ) { $xml->{SMART}{no_order} = 1 ;} if ( $args{no_nodes} ) { $xml->{SMART}{no_nodes} = 1 ;} if ( $args{use_spaces} ) { $xml->{SMART}{use_spaces} = 1 ;} $xml->{SMART}{on_start} = $args{on_start} if ref($args{on_start}) eq 'CODE' ; $xml->{SMART}{on_char} = $args{on_char} if ref($args{on_char}) eq 'CODE' ; $xml->{SMART}{on_end} = $args{on_end} if ref($args{on_end}) eq 'CODE' ; $xml->setHandlers( Init => \&_Init , Start => \&_Start , Char => \&_Char , End => \&_End , Final => \&_Final , ) ; my $tree ; eval { $tree = $xml->parse($data); }; croak( $@ ) if( $@ ); return( $tree ) ; } ################################################## ## UNUSED - DEPRECATED. ## ################################################## sub _clean_data_with_lt { my $data = shift ; my @data = split( //, $data ) ; my $data_len = @data ; # State Machine Definition: my %state_machine = ( 'in_cdata_block' => 0 , 'seen_some_tag' => 0 , 'need_to_cdata_this' => 0 , 'prev_lt' => -1 , 'last_tag_start' => -1 , 'last_tag_close' => -1 , 'tag_balance' => 0 , ); CHAR: for( my $index = 0; $index < $data_len; $index++ ) { { no warnings ; next CHAR unless( $data[ $index ] eq '<' or $data[ $index ] eq '>' ) ; } if( $data[ $index ] eq '<' ) { next CHAR if( $state_machine{ 'in_cdata_block' } ) ; { # Check for possibility of this being a cdata block my $possible_cdata_block = join( '', @data[ $index .. ( $index + 8 ) ] ) ; if( $possible_cdata_block eq '' ) { if( $state_machine{ 'in_cdata_block' } ) { my $possible_cdata_close = join( '', @data[ ( $index - 2 ) .. $index ] ) ; if( $possible_cdata_close eq ']]>' ) { $state_machine{ 'in_cdata_block' } = 0 ; $state_machine{ 'tag_balance' } = 0 ; next CHAR ; } next CHAR ; } unless( $state_machine{ 'seen_some_tag' } ) { croak " > found before < - Input XML seems to have errors!\n"; } $state_machine{ 'tag_balance' }-- ; unless( $state_machine{ 'tag_balance' } ) { $state_machine{ 'last_tag_close' } = $index ; next CHAR ; } ## Need to add CDATA now. my $last_tag_close = $state_machine{ 'last_tag_close' } ; my $prev_lt = $state_machine{ 'prev_lt' } ; $data[ $last_tag_close ] = '><' ; $state_machine{ 'last_tag_close' } = $index ; $state_machine{ 'need_to_cdata_this' } = 0 ; $state_machine{ 'tag_balance' } = 0 ; } } $data = join( '', @data ) ; return $data; } ########### # GET_URL # ########### sub get_url { my ( $url ) = @_ ; my $data ; require LWP ; require LWP::UserAgent ; my $ua = LWP::UserAgent->new(); my $agent = $ua->agent() ; $agent = "XML::Smart/$XML::Smart::VERSION $agent" ; $ua->agent($agent) ; my $req = HTTP::Request->new(GET => $url) ; my $res = $ua->request($req) ; if ($res->is_success) { return $res->content ;} else { return undef ;} } ########## # MODULE # ########## sub module { foreach my $Key ( keys %PARSERS ) { if ($PARSERS{$Key}) { my $module = $Key ; $module =~ s/_/::/g ; return( $module ) ; } } return('') ; } ######### # _INIT # ######### sub _Init { my $this = shift ; $this->{PARSING}{tree} = {} ; $this->{PARSING}{p} = $this->{PARSING}{tree} ; return ; } ########## # _START # ########## sub _Start { my $this = shift ; if ( $this->{LAST_CALL} && ( $this->{LAST_CALL} eq 'char' ) ) { _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ; } ##print "START>> @_\n" ; $this->{LAST_CALL} = 'start' ; _unset_sig_warn(); my ( $tag , %args ) = @_ ; _reset_sig_warn(); if ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;} elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;} $this->{PARSING}{p}{'/nodes'}{$tag} = 1 if !$this->{SMART}{no_nodes} ; push( @{$this->{PARSING}{p}{'/order'}} , $tag) if !$this->{SMART}{no_order} ; if ( $this->{SMART}{arg} ) { my $type = $this->{SMART}{arg} ; my %argsok ; foreach my $Key ( keys %args ) { my $k ; if ($type == 1) { $k = lc($Key) ;} elsif ($type == 2) { $k = uc($Key) ;} if (exists $argsok{$k}) { if ( ref $argsok{$k} ne 'ARRAY' ) { my $key = $argsok{$k} ; $argsok{$k} = [$key] ; } push(@{$argsok{$k}} , $args{$Key}) ; } else { $argsok{$k} = $args{$Key} ;} } %args = %argsok ; } if ( $this->{SMART}{arg_single} ) { foreach my $Key ( keys %args ) { $args{$Key} = 1 if !defined $args{$Key} ; } } ## Args order: if ( !$this->{SMART}{no_order} ) { my @order ; for(my $i = 1 ; $i < $#_ ; $i+=2) { push( @order , $_[$i] ) ;} if ( $this->{SMART}{arg} ) { my $type = $this->{SMART}{arg} ; foreach my $order_i ( @order ) { if ($type == 1) { $order_i = lc($order_i) ;} elsif ($type == 2) { $order_i = uc($order_i) ;} } } $args{'/order'} = \@order if @order ; } $args{'/tag'} = $tag ; $args{'/back'} = $this->{PARSING}{p} ; if ($this->{NOENTITY}) { foreach my $Key ( keys %args ) { &_parse_basic_entity( $args{$Key} ) ;} } if ( defined $this->{PARSING}{p}{$tag} ) { if ( ref($this->{PARSING}{p}{$tag}) ne 'ARRAY' ) { my $prev = $this->{PARSING}{p}{$tag} ; $this->{PARSING}{p}{$tag} = [$prev] ; } push(@{$this->{PARSING}{p}{$tag}} , \%args) ; my $i = @{$this->{PARSING}{p}{$tag}} ; $i-- ; $args{'/i'} = $i ; $this->{PARSING}{p} = \%args ; } else { $this->{PARSING}{p}{$tag} = \%args ; ## Change the pointer: $this->{PARSING}{p} = \%args ; } if ( $this->{SMART}{on_start} ) { my $sub = $this->{SMART}{on_start} ; &$sub($tag , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , undef , $this ) ; } return ; } ######### # _CHAR # ######### # # XML::Parser parse each line as a different call to _Char(). # For XML::Smart multiple calls to _Char() occurs only when the content # have other nodes inside. # sub _Char { ##print "CHAR>>\n" ; my $this = shift ; $this->{CONTENT_BUFFER} .= $_[0] ; $this->{LAST_CALL} = 'char' ; return ; } sub _Char_process { my $this = shift ; ##print "CONT>> ##@_##\n" ; my $content = $_[0] ; if ( !$this->{SMART}{use_spaces} && $content !~ /\S+/s ) { return ;} ###### if (! defined $this->{PARSING}{p}{'dt:dt'} && defined $this->{PARSING}{p}{'DT:DT'}) { $this->{PARSING}{p}{'dt:dt'} = delete $this->{PARSING}{p}{'DT:DT'} ; } if ( $this->{PARSING}{p}{'dt:dt'} && ( $this->{PARSING}{p}{'dt:dt'} =~ /binary\.base64/si ) ) { require XML::Smart::Base64 ; $content = &XML::Smart::Base64::decode_base64($content) ; delete $this->{PARSING}{p}{'dt:dt'} ; if ( $this->{PARSING}{p}{'/nodes'} ) { delete $this->{PARSING}{p}{'/nodes'}{'dt:dt'} ; my $nkeys = keys %{$this->{PARSING}{p}{'/nodes'}} ; if ($nkeys < 1) { delete $this->{PARSING}{p}{'/nodes'} ;} } if ( $this->{PARSING}{p}{'/order'} ) { my @order = @{$this->{PARSING}{p}{'/order'}} ; my @order_ok ; foreach my $order_i ( @order ) { push(@order_ok , $order_i) if $order_i ne 'dt:dt' ;} if (@order_ok) { $this->{PARSING}{p}{'/order'} = \@order_ok ;} else { delete $this->{PARSING}{p}{'/order'} ;} } } elsif ($this->{NOENTITY}) { &_parse_basic_entity($content) ;} ###### if ( !exists $this->{PARSING}{p}{CONTENT} ) { $this->{PARSING}{p}{CONTENT} = $content ; push(@{$this->{PARSING}{p}{'/order'}} , 'CONTENT') if !$this->{SMART}{no_order} ; } else { if ( !tied $this->{PARSING}{p}{CONTENT} ) { my $cont = $this->{PARSING}{p}{CONTENT} ; $this->{PARSING}{p}{CONTENT} = '' ; my $tied = tie( $this->{PARSING}{p}{CONTENT} => 'XML::Smart::TieScalar' , $this->{PARSING}{p}) ; push(@{$this->{TIED_CONTENTS}} , $tied) ; $this->{PARSING}{p}{'/.CONTENT/x'} = 0 ; $this->{PARSING}{p}{"/.CONTENT/0"} = $cont ; my $cont_pos = 0 ; for my $key ( @{$this->{PARSING}{p}{'/order'}} ) { last if ($key eq 'CONTENT') ; ++$cont_pos ; } splice( @{$this->{PARSING}{p}{'/order'}} , $cont_pos,0, "/.CONTENT/0") if !$this->{SMART}{no_order} ; } my $x = ++$this->{PARSING}{p}{'/.CONTENT/x'} ; $this->{PARSING}{p}{"/.CONTENT/$x"} = $content ; push( @{$this->{PARSING}{p}{'/order'}} , "/.CONTENT/$x") if !$this->{SMART}{no_order} ; } if ( $this->{SMART}{on_char} ) { my $sub = $this->{SMART}{on_char} ; &$sub($this->{PARSING}{p}{'/tag'} , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , \$this->{PARSING}{p}{CONTENT} , $this ) ; } return ; } ######## # _END # ######## sub _End { ##print "END>> @_[1] >> $_[0]->{PARSING}{p}{'/tag'}\n" ; my $this = shift ; if ( $this->{LAST_CALL} eq 'char' ) { _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ;} $this->{LAST_CALL} = 'end' ; my $tag = shift ; if ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;} elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;} if ( $this->{PARSING}{p}{'/tag'} ne $tag ) { return ;} delete $this->{PARSING}{p}{'/tag'} ; my $back = delete $this->{PARSING}{p}{'/back'} ; my $i = delete $this->{PARSING}{p}{'/i'} || 0 ; my $nkeys = keys %{$this->{PARSING}{p}} ; if ( $nkeys == 1 && exists $this->{PARSING}{p}{CONTENT} ) { if (ref($back->{$tag}) eq 'ARRAY') { $back->{$tag}[$i] = $this->{PARSING}{p}{CONTENT} ;} else { $back->{$tag} = $this->{PARSING}{p}{CONTENT} ;} } if ( $this->{PARSING}{p}{'/nodes'} && !%{$this->{PARSING}{p}{'/nodes'}} ) { delete $this->{PARSING}{p}{'/nodes'} ;} if ( $this->{PARSING}{p}{'/order'} && $#{$this->{PARSING}{p}{'/order'}} <= 0 ) { delete $this->{PARSING}{p}{'/order'} ;} delete $this->{PARSING}{p}{'/.CONTENT/x'} ; if ( $this->{SMART}{on_end} ) { my $sub = $this->{SMART}{on_end} ; &$sub($tag , $this->{PARSING}{p} , $back , undef , $this) ; } $this->{PARSING}{p} = $back ; return ; } ########## # _FINAL # ########## sub _Final { my $this = shift ; my $tree = $this->{PARSING}{tree} ; foreach my $tied_cont ( @{$this->{TIED_CONTENTS}} ) { $tied_cont->_cache_keys ; } delete $this->{TIED_CONTENTS} ; delete $this->{LAST_CALL} ; delete($this->{PARSING}) ; return($tree) ; } ####### # END # ####### 1; __END__