package WWW::Tabela::Fipe; use Moo; with 'HTML::Robot::Scrapper::Reader'; use Data::Printer; use utf8; use HTML::Entities; use HTTP::Request::Common qw(POST); our $VERSION = 0.002; has [ qw/marcas viewstate eventvalidation/ ] => ( is => 'rw' ); has veiculos => ( is => 'rw' , default => sub { return []; }); has referer => ( is => 'rw' ); sub start { my ( $self ) = @_; } has startpage => ( is => 'rw', default => sub { return [ { tipo => 'moto', url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?azxp=1&v=m&p=52' }, { tipo => 'carro', url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?p=51' }, { tipo => 'caminhao', url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?v=c&p=53' }, ] }, ); sub on_start { my ( $self ) = @_; foreach my $item ( @{ $self->startpage } ) { $self->append( search => $item->{ url }, { passed_key_values => { tipo => $item->{ tipo }, referer => $item->{ url }, } } ); } } sub _headers { my ( $self , $url, $form ) = @_; return { 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding' => 'gzip, deflate', 'Accept-Language' => 'en-US,en;q=0.5', 'Cache-Control' => 'no-cache', 'Connection' => 'keep-alive', 'Content-Length' => length( POST('url...', [], Content => $form)->content ), 'Content-Type' => 'application/x-www-form-urlencoded; charset=utf-8', 'DNT' => '1', 'Host' => 'www.fipe.org.br', 'Pragma' => 'no-cache', 'Referer' => $url, 'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0', 'X-MicrosoftAjax' => 'Delta=true', }; } sub _form { my ( $self, $args ) = @_; return [ ScriptManager1 => $args->{ script_manager }, __ASYNCPOST => 'true', __EVENTARGUMENT => '', __EVENTTARGET => $args->{ event_target }, __EVENTVALIDATION => $args->{ event_validation }, __LASTFOCUS => '', __VIEWSTATE => $args->{ viewstate }, ddlAnoValor => ( !exists $args->{ano} ) ? 0 : $args->{ ano }, ddlMarca => ( !exists $args->{marca} ) ? 0 : $args->{ marca }, ddlModelo => ( !exists $args->{modelo} ) ? 0 : $args->{ modelo }, ddlTabelaReferencia => $args->{ tb_referencia }, txtCodFipe => '', ]; } sub search { my ( $self ) = @_; my $marcas = $self->tree->findnodes( '//select[@name="ddlMarca"]/option' ); my $viewstate = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__VIEWSTATE"]' )->get_node->attr('value'); my $event_validation = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__EVENTVALIDATION"]' )->get_node->attr('value'); my $tabela_referencia = $self->tree->findnodes( '//select[@name="ddlTabelaReferencia"]/option[@selected="selected"]' )->get_node->attr( 'value' ); foreach my $marca ( $marcas->get_nodelist ) { my $form = $self->_form( { script_manager => 'UdtMarca|ddlMarca', event_target => 'ddlMarca', event_validation=> $event_validation, viewstate => $viewstate, marca => $marca->attr( 'value' ), tb_referencia => $tabela_referencia, } ); $self->prepend( busca_marca => 'url' , { passed_key_values => { marca => $marca->as_text, marca_id => $marca->attr( 'value' ), tipo => $self->robot->reader->passed_key_values->{ tipo }, referer => $self->robot->reader->passed_key_values->{ referer }, tb_referencia => $tabela_referencia, }, request => [ 'POST', $self->robot->reader->passed_key_values->{ referer }, { headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ), content => POST('url...', [], Content => $form)->content, } ] } ); } } sub busca_marca { my ( $self ) = @_; my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.+)__VIEWSTATE\|([^\|]+)\|/g; my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.+)__EVENTVALIDATION\|([^\|]+)\|/g; my $modelos = $self->tree->findnodes( '//select[@name="ddlModelo"]/option' ); foreach my $modelo ( $modelos->get_nodelist ) { next unless $modelo->as_text !~ m/selecione/ig; my $kv={}; $kv->{ modelo_id } = $modelo->attr( 'value' ); $kv->{ modelo } = $modelo->as_text; $kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id }; $kv->{ marca } = $self->robot->reader->passed_key_values->{ marca }; $kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo }; $kv->{ referer } = $self->robot->reader->passed_key_values->{ referer }; $kv->{ tb_referencia } = $self->robot->reader->passed_key_values->{ tb_referencia }; my $form = $self->_form( { script_manager => 'updModelo|ddlModelo', event_target => 'ddlModelo', event_validation=> $event_validation, viewstate => $viewstate, marca => $kv->{ marca_id }, modelo => $kv->{ modelo_id }, tb_referencia => $self->robot->reader->passed_key_values->{ tb_referencia }, } ); $self->prepend( busca_modelo => '', { passed_key_values => $kv, request => [ 'POST', $self->robot->reader->passed_key_values->{ referer }, { headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ), content => POST( 'url...', [], Content => $form )->content, } ] } ); } } sub busca_modelo { my ( $self ) = @_; my $anos = $self->tree->findnodes( '//select[@id="ddlAnoValor"]//option' ); foreach my $ano ( $anos->get_nodelist ) { my $kv = {}; $kv->{ ano_id } = $ano->attr( 'value' ); $kv->{ ano } = $ano->as_text; $kv->{ modelo_id } = $self->robot->reader->passed_key_values->{ modelo_id }; $kv->{ modelo } = $self->robot->reader->passed_key_values->{ modelo }; $kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id }; $kv->{ marca } = $self->robot->reader->passed_key_values->{ marca }; $kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo }; $kv->{ referer } = $self->robot->reader->passed_key_values->{ referer }; $kv->{ tb_referencia } = $self->robot->reader->passed_key_values->{ tb_referencia }; next unless $ano->as_text !~ m/selecione/ig; my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.*)__VIEWSTATE\|([^\|]+)\|/g; my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.*)__EVENTVALIDATION\|([^\|]+)\|/g; my $form = $self->_form( { script_manager => 'updAnoValor|ddlAnoValor', event_target => 'ddlAnoValor', event_validation=> $event_validation, viewstate => $viewstate, marca => $kv->{ marca_id }, modelo => $kv->{ modelo_id }, ano => $kv->{ ano_id }, tb_referencia => $self->robot->reader->passed_key_values->{ tb_referencia }, } ); $self->prepend( busca_ano => '', { passed_key_values => $kv, request => [ 'POST', $self->robot->reader->passed_key_values->{ referer }, { headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ), content => POST( 'url...', [], Content => $form )->content, } ] } ); } } sub busca_ano { my ( $self ) = @_; my $item = {}; $item->{ mes_referencia } = $self->tree->findvalue('//span[@id="lblReferencia"]') ; $item->{ cod_fipe } = $self->tree->findvalue('//span[@id="lblCodFipe"]'); $item->{ marca } = $self->tree->findvalue('//span[@id="lblMarca"]'); $item->{ modelo } = $self->tree->findvalue('//span[@id="lblModelo"]'); $item->{ ano } = $self->tree->findvalue('//span[@id="lblAnoModelo"]'); $item->{ preco } = $self->tree->findvalue('//span[@id="lblValor"]'); $item->{ data } = $self->tree->findvalue('//span[@id="lblData"]'); $item->{ tipo } = $self->robot->reader->passed_key_values->{ tipo } ; $item->{ tb_referencia } = $self->robot->reader->passed_key_values->{ tb_referencia }; push( @{$self->veiculos}, $item ); } sub on_link { my ( $self, $url ) = @_; } sub on_finish { my ( $self ) = @_; $self->robot->writer->write( $self->veiculos ); } =head1 NAME WWW::Tabela::Fipe - Baixe a tabela fipe completa mantenha-se atualizado =head1 SYNOPSIS salve o trecho abaixo em um arquivo ex: vim fipe.pl e coloque esse conteudo: package WWW::Tabela::Fipe::Parser; use Moo; with('HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath'); with('HTML::Robot::Scrapper::Parser::XML::XPath'); sub content_types { my ( $self ) = @_; return { 'text/html' => [ { parse_method => 'parse_xpath', description => q{ The method above 'parse_xpath' is inside class: HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath }, } ], 'text/plain' => [ { parse_method => 'parse_xpath', description => q{ esse site da fipe responde em text/plain e eu preciso parsear esse content type. por isso criei esta classe e passei ela como parametro, sobreescrevendo a classe HTML::Robot::Scrapper::Parser::Default }, } ], 'text/xml' => [ { parse_method => 'parse_xml' }, ], }; } 1; package FIPE; use HTML::Robot::Scrapper; use CHI; use HTTP::Tiny; use HTTP::CookieJar; my $robot = HTML::Robot::Scrapper->new ( reader => { # REQ class => 'WWW::Tabela::Fipe', }, writer => {class => 'WWW::Tabela::FipeWrite',}, #REQ benchmark => {class => 'Default'}, # cache => { # class => 'Default', # args => { # is_active => 0, # engine => CHI->new( # driver => 'BerkeleyDB', # root_dir => "/home/catalyst/WWW-Tabela-Fipe/cache/", # ), # }, # }, log => {class => 'Default'}, parser => {class => 'WWW::Tabela::Fipe::Parser'}, #custom para tb fipe. pois eles respondem com Content type text/plain queue => {class => 'Default'}, useragent => { class => 'Default', args => { ua => HTTP::Tiny->new( cookie_jar => HTTP::CookieJar->new), } }, encoding => {class => 'Default'}, instance => {class => 'Default'}, ); $robot->start(); depois, é só executar, ex: perl -I/home/catalyst/HTML-Robot-Scraper/lib/ -I./lib/ fipe.pl o comando acima vai usar uma versao local do HTML-Robot-Scrapper.... se vc tiver instalado vc pode executar assim: perl fipe.pl espero que gostem =head1 DESCRIPTION Este módulo baixa a tabela FIPE atualizada para motos caminhoes e carros. Direto do site da FIPE. Fonte: fipe.org.br Downloads the FIPE table updated directly from fipe source. DataSource: fipe.org.br =head1 AUTHOR HERNAN CPAN ID: HERNAN perldelux hernan@cpan.org http://github.com/hernan604 =head1 COPYRIGHT This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =head1 SEE ALSO perl(1). =cut #################### main pod documentation end ################### 1; # The preceding line will help the module return a true value