#!/usr/bin/env perl use strictures 1; use utf8; use 5.018; =head1 NAME Bio::WebService::LANL::SequenceLocator::Server - A JSON web API for LANL's HIV sequence locator =head1 SYNOPSIS After installation: plackup `perldoc -l Bio::WebService::LANL::SequenceLocator::Server` Or from a git checkout or tarball: plackup # uses app.psgi Or as a L managed service: examples/service start =head1 DESCRIPTION This server powers L for the L using L. =head1 ENVIRONMENT =head2 SERVER_ADMIN Set the SERVER_ADMIN environment variable before starting the server to provide a contact address in requests to LANL and server error messages in API responses. =head1 INSTALLATION The prerequisites for this server are optional and are probably not installed by default on your computer when you install this distribution. From a git checkout or tarball, you can install the necessary modules with L: cpanm --with-all-features --installdeps . =cut package Bio::WebService::LANL::SequenceLocator::Server; use Web::Simple; use Bio::WebService::LANL::SequenceLocator; use File::Share qw< dist_file >; use JSON qw< encode_json >; use Text::CSV; use Plack::App::File; use Path::Tiny; use IO::String; has contact => ( is => 'ro', default => sub { $ENV{SERVER_ADMIN} || '[no address provided]' }, ); has locator => ( is => 'ro', isa => sub { die "Attribute 'locator' is not a Bio::WebService::LANL::SequenceLocator" unless $_[0]->isa("Bio::WebService::LANL::SequenceLocator"); }, lazy => 1, builder => sub { Bio::WebService::LANL::SequenceLocator->new( agent_string => join " ", "via", __PACKAGE__, $_[0]->contact ) }, ); has about_page => ( is => 'ro', lazy => 1, builder => sub { dist_file('Bio-WebService-LANL-SequenceLocator', 'about.html') }, ); has formats => ( is => 'ro', default => sub { [qw( json csv )] }, ); sub dispatch_request { sub (POST + /within/hiv) { sub (%base~&format~) { my ($self, $base, $format) = @_; $format ||= 'json'; $format = lc $format; return error(406 => "format '$format' is not supported; try one of " . join(", ", @{$self->formats})) unless grep { $format eq $_ } @{$self->formats}; sub (%fasta=) { my ($self, $fasta) = @_; return $self->locate_sequences_from_fasta($fasta, $base, $format); }, sub (*fasta=) { my ($self, $fasta) = @_; return error(422 => $fasta->reason) unless $fasta->is_upload; return $self->locate_sequences_from_fasta(path($fasta->path)->slurp, $base, $format); }, sub (%@sequence~) { my ($self, $sequences) = @_; return $self->locate_sequences($sequences, $base, $format); }, }, }, sub (GET + /within/hiv) { error( 405 => "You must make location requests using POST." ) }, sub (GET + /) { state $about = Plack::App::File->new(file => $_[0]->about_page); $about; }, } sub locate_sequences_from_fasta { my $self = shift; my $fasta = shift; my $sequences = $self->read_fasta(\$fasta) or return error( 415 => "Couldn't parse FASTA; invalid formating?" ); return $self->locate_sequences($sequences, @_); } sub locate_sequences { my ($self, $sequences, $base, $format) = @_; return error(422 => 'At least one value for "sequence" is needed.') unless $sequences and @$sequences; my $results = $self->locator->find($sequences, base => $base) or return error(503 => "Backend request to LANL failed, sorry! Contact @{[ $self->contact ]} if the problem persists."); return $self->format_results($results, $format); } sub format_results { my ($self, $results, $format) = @_; my $formatter = $self->can("as_$format") or return error(500 => "Unknown format '$format'"); return $formatter->($self, $results); } sub as_json { my ($self, $results) = @_; my $json = eval { encode_json($results) }; if ($@ or not $json) { warn $@ ? "Error encoding JSON response: $@\n" : "Failed to encode JSON response, but no error?!\n"; return error(500 => "Error encoding results to JSON. Contact @{[ $self->contact ]}"); } return [ 200, [ 'Content-type' => 'application/json' ], [ $json, "\n" ], ]; } sub as_csv { my ($self, $results) = @_; my $csv = IO::String->new; my $write = sub { state $csv_writer = Text::CSV->new({ binary => 1 }); $csv_writer->print($csv, @_); $csv->print("\n"); }; my @fields = qw( query_sequence base_type reverse_complement genome_start genome_end polyprotein start end region_names similarity_to_hxb2 alignment hxb2_sequence ); $write->(\@fields); for my $query (@$results) { # Trim leading/trailing whitespace $query->{alignment} =~ s/^\n//gm; $query->{alignment} =~ s/^\s*$//gm; chomp $query->{alignment}; $query->{region_names} = join " ", @{$query->{region_names}}; $write->([ @$query{@fields} ]); } $csv->seek(0, 0); return [ 200, [ 'Content-type' => 'text/csv', 'Content-disposition' => 'inline; filename="located.csv"' ], $csv, ]; } sub read_fasta { my ($self, $fasta) = @_; # XXX TODO: preserve sequence names and use them in output? my (@sequences) = map { chomp; $_ } split /^>.*\R/m, $$fasta; # Remove any leading garbage before the first description line (usually # just the empty string) shift @sequences; return \@sequences; } sub error { return [ shift, [ 'Content-type' => 'text/plain' ], [ join(" ", @_), "\n" ] ]; } __PACKAGE__->run_if_script; =head1 AUTHOR Thomas Sibley Etrsibley@uw.eduE =head1 COPYRIGHT Copyright 2014 by the Mullins Lab, Department of Microbiology, University of Washington. =head1 LICENSE Licensed under the same terms as Perl 5 itself. =cut