package HTML::Feature::Engine::LDRFullFeed; use strict; use warnings; use HTML::TreeBuilder::XPath; use LWP::Simple; use Storable qw(retrieve nstore); use JSON; use Encode; use Carp; use base qw(HTML::Feature::Base); __PACKAGE__->mk_accessors($_) for qw(_LDRFullFeed); sub run { my $self = shift; my $html_ref = shift; my $url = shift; my $result = shift; my $tree = HTML::TreeBuilder::XPath->new; $tree->no_space_compacting(1); $tree->ignore_ignorable_whitespace(0); $tree->parse($$html_ref); $tree->eof; my $site_info = $self->_detect_siteinfo($url); if ($site_info) { my $xpath = $site_info->{data}->{xpath}; my $text; for my $node ( $tree->findnodes($xpath) ) { $text .= $node->as_text; } $result->text($text); if ( !$result->title ) { if ( my $title = $tree->look_down( _tag => "title" ) ) { $result->title( $title->as_text ); } } if ( !$result->desc ) { if ( my $desc = $tree->look_down( _tag => 'meta', name => 'description' ) ) { $result->desc( $desc->attr("content") ); } } } if ( $result->text ) { $result->{matched_engine} = 'LDRFullFeed'; } $tree->delete; return $result; } sub LDRFullFeed { my $self = shift; my $c = $self->context; $self->_LDRFullFeed || sub { my $data; my $path = $INC{'HTML/Feature/Engine/LDRFullFeed.pm'}; $path =~ s/.pm//; $path .= '/item.st'; if ( $c->config->{LDRFullFeed}->{data_file_path} ) { my $path = $c->config->{LDRFullFeed}->{data_file_path}; if ( -e $path ) { $data = retrieve($path); } else { my $json = get('http://wedata.net/databases/LDRFullFeed/items.json'); my $data = from_json($json); nstore( $data, $path ); } } else { $data = retrieve($path); } my %priority = ( SBM => 1000, INDIVIDUAL => 100, IND => 100, SUBGENERAL => 10, SUB => 10, GENERAL => 1, GEN => 1 ); my @sorted = sort { $a->{data}->{priority} <=> $b->{data}->{priority} } map { $_->{data}->{priority} ||= sub { my $type = $_->{data}->{type}; if ( $priority{$type} ) { $_->{data}->{type} = $priority{$type}; } else { $_->{data}->{type} = 0; } return $_; } ->(); } @$data; $self->_LDRFullFeed( \@sorted ); } ->(); } sub _detect_siteinfo { my $self = shift; my $url = shift; unless($url){ carp("WARNING: if you use 'HTML::Feature::Engine::LDRFullFeed', URL will be necessary (as second arguments)"); return; } my $data = $self->LDRFullFeed; for my $item (@$data) { if ( ( $item->{data}->{url} ) && ( $url =~ /$item->{data}->{url}/ ) ) { return $item; } } return; } 1; __END__ =head1 NAME HTML::Feature::Engine::LDRFullFeed - An engine module that uses wedata's database (LDRFullFeed) =head1 SYNOPSIS =head1 DESCRIPTION =head1 METHODS =head2 run =head2 LDRFullFeed =head1 AUTHOR Takeshi Miki Emiki@cpan.orgE =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO =cut