package DataFlow::Node::HTMLFilter;
BEGIN {
$DataFlow::Node::HTMLFilter::VERSION = '0.91.02';
}
use Moose;
extends 'DataFlow::Node';
use Moose::Util::TypeConstraints;
use HTML::TreeBuilder::XPath;
has search_xpath => (
is => 'ro',
isa => 'Str',
required => 1,
);
enum _result_type => [qw(NODE HTML VALUE)];
has result_type => (
is => 'ro',
isa => '_result_type',
default => 'HTML',
);
has ref_result => (
is => 'ro',
isa => 'Bool',
default => 0,
);
has '+process_item' => (
lazy => 1,
default => sub {
my $self = shift;
my $proc = sub {
my ( $self, $item ) = @_;
#use Data::Dumper; warn 'htmlfilter::process_item: '.Dumper($item);
my $html = HTML::TreeBuilder::XPath->new_from_content($item);
#warn 'xpath is built';
#warn 'values if VALUES';
return $html->findvalues( $self->search_xpath )
if $self->result_type eq 'VALUE';
#warn 'not values, find nodes';
my @result = $html->findnodes( $self->search_xpath );
#use Data::Dumper; warn 'result = '.Dumper(\@result);
return () unless @result;
return @result if $self->result_type eq 'NODE';
#warn 'wants HTML';
return map { $_->as_HTML } @result;
};
return $self->ref_result ? sub { return [ $proc->(@_) ] } : $proc;
},
);
__PACKAGE__->meta->make_immutable;
1;
__END__
=pod
=head1 NAME
DataFlow::Node::HTMLFilter - A filter node for HTML content.
=head1 VERSION
version 0.91.02
=head1 SYNOPSIS
use DataFlow::Node::HTMLFilter;
my $filter_html = DataFlow::Node::HTMLFilter->new(
search_xpath => '//td',
result_type => 'HTML',
);
my $filter_value = DataFlow::Node::HTMLFilter->new(
search_xpath => '//td',
result_type => 'VALUE',
);
my $input = <
| Line 1 | L1, Column 2 |
| Line 2 | L2, Column 2 |