package Elastic::Model::View; $Elastic::Model::View::VERSION = '0.52'; use Moose; use Carp; use Elastic::Model::Types qw( IndexNames ArrayRefOfStr SortArgs HighlightArgs Consistency Replication); use MooseX::Types::Moose qw(Str Int HashRef ArrayRef Bool Num Object); use Elastic::Model::SearchBuilder(); use namespace::autoclean; #=================================== has 'domain' => ( #=================================== isa => IndexNames, is => 'rw', lazy => 1, builder => '_build_domains', coerce => 1, ); #=================================== has 'type' => ( #=================================== is => 'rw', isa => ArrayRefOfStr, default => sub { [] }, coerce => 1, ); #=================================== has 'query' => ( #=================================== isa => HashRef, is => 'rw', ); #=================================== has 'filter' => ( #=================================== isa => HashRef, is => 'rw', ); #=================================== has 'post_filter' => ( #=================================== isa => HashRef, is => 'rw', ); #=================================== has 'aggs' => ( #=================================== traits => ['Hash'], isa => HashRef [HashRef], is => 'rw', handles => { add_agg => 'set', remove_agg => 'delete', get_agg => 'get' } ); #=================================== has 'facets' => ( #=================================== traits => ['Hash'], isa => HashRef [HashRef], is => 'rw', handles => { add_facet => 'set', remove_facet => 'delete', get_facet => 'get' } ); #=================================== has 'fields' => ( #=================================== isa => ArrayRefOfStr, coerce => 1, is => 'rw', default => sub { [] }, ); #=================================== has 'from' => ( #=================================== isa => Int, is => 'rw', default => 0, ); #=================================== has 'size' => ( #=================================== isa => Int, is => 'rw', lazy => 1, default => 10, predicate => '_has_size', ); #=================================== has 'sort' => ( #=================================== isa => SortArgs, is => 'rw', coerce => 1, ); #=================================== has 'highlighting' => ( #=================================== isa => HashRef, is => 'rw', trigger => \&_check_no_fields, ); #=================================== has 'highlight' => ( #=================================== is => 'rw', isa => HighlightArgs, coerce => 1, ); #=================================== has 'index_boosts' => ( #=================================== isa => HashRef [Num], is => 'rw', traits => ['Hash'], handles => { add_index_boost => 'set', remove_index_boost => 'delete', get_index_boost => 'get' } ); #=================================== has 'min_score' => ( #=================================== isa => Num, is => 'rw', ); #=================================== has 'preference' => ( #=================================== isa => Str, is => 'rw', ); #=================================== has 'routing' => ( #=================================== isa => ArrayRefOfStr, coerce => 1, is => 'rw', ); #=================================== has 'include_paths' => ( #=================================== is => 'rw', isa => ArrayRef [Str], predicate => '_has_include_paths' ); #=================================== has 'exclude_paths' => ( #=================================== is => 'rw', isa => ArrayRef [Str], predicate => '_has_exclude_paths' ); #=================================== has 'script_fields' => ( #=================================== isa => HashRef, is => 'rw', traits => ['Hash'], handles => { add_script_field => 'set', remove_script_field => 'delete', get_script_field => 'get' } ); #=================================== has 'timeout' => ( #=================================== isa => Str, is => 'rw', ); #=================================== has 'explain' => ( #=================================== is => 'rw', isa => Bool, ); #=================================== has 'stats' => ( #=================================== is => 'rw', isa => ArrayRefOfStr, coerce => 1, ); #=================================== has 'track_scores' => ( #=================================== isa => Bool, is => 'rw', ); #=================================== has 'consistency' => ( #=================================== is => 'rw', isa => Consistency, ); #=================================== has 'replication' => ( #=================================== is => 'rw', isa => Replication ); #=================================== has 'search_builder' => ( #=================================== isa => Object, is => 'rw', lazy => 1, builder => '_build_search_builder', ); #=================================== has 'cache' => ( #=================================== is => 'rw', isa => Object, ); #=================================== has 'cache_opts' => ( #=================================== is => 'rw', isa => HashRef, ); #=================================== sub _build_search_builder { Elastic::Model::SearchBuilder->new } #=================================== #=================================== sub queryb { #=================================== my $self = shift; my @args = @_ > 1 ? {@_} : shift(); my $query = $self->search_builder->query(@args) or return $self->_clone_self; $self->query( $query->{query} ); } #=================================== sub filterb { #=================================== my $self = shift; my @args = @_ > 1 ? {@_} : shift(); my $filter = $self->search_builder->filter(@args) or return $self->_clone_self; $self->filter( $filter->{filter} ); } #=================================== sub post_filterb { #=================================== my $self = shift; my @args = @_ > 1 ? {@_} : shift(); my $filter = $self->search_builder->filter(@args) or return $self->_clone_self; $self->post_filter( $filter->{filter} ); } #=================================== # clone views when setting attributes #=================================== around [ #=================================== 'from', 'size', 'timeout', 'track_scores', 'search_builder', 'preference', 'min_score', 'explain', 'consistency', 'replication', 'cache' #=================================== ] => sub { _clone_args( \&_scalar_args, @_ ) }; around [ #=================================== 'domain', 'type', 'fields', 'sort', 'routing', 'stats', 'include_paths', 'exclude_paths' #=================================== ] => sub { _clone_args( \&_array_args, @_ ) }; around [ #=================================== 'aggs', 'facets', 'index_boosts', 'script_fields', 'highlighting', 'query', 'filter', 'post_filter', 'cache_opts' #=================================== ] => sub { _clone_args( \&_hash_args, @_ ) }; #=================================== around 'highlight' #=================================== => sub { _clone_args( \&_highlight_args, @_ ) }; for my $name ( 'agg', 'facet', 'index_boost', 'script_field' ) { my $attr = $name . 's'; for my $method ( "add_$name", "remove_$name" ) { around $method => sub { my $orig = shift; my $self = shift; my %hash = %{ $self->$attr || {} }; $self = $self->$attr( \%hash ); $self->$orig(@_); return $self; }; } } #=================================== sub _scalar_args {@_} sub _hash_args { @_ > 1 ? {@_} : @_ } sub _highlight_args { ref $_[0] ? shift : \@_ } sub _array_args { ref $_[0] eq 'ARRAY' ? shift() : \@_ } #=================================== #=================================== sub _clone_args { #=================================== my $args = shift; my $orig = shift; my $self = shift; if (@_) { $self = bless {%$self}, ref $self; $self->$orig( $args->(@_) ); return $self; } $self->$orig(); } #=================================== sub _clone_self { #=================================== my $self = shift; return bless {%$self}, ref $self; } #=================================== sub _check_no_fields { #=================================== my ( $self, $val ) = @_; croak "Use the (highlight) attribute to set the fields to highlight" if $val->{fields}; } no Moose; #=================================== sub BUILD { #=================================== my ( $orig_self, $args ) = @_; my $self = $orig_self; for (qw(queryb filterb post_filterb)) { $self = $self->$_( $args->{$_} ) if defined $args->{$_}; } %{$orig_self} = %{$self}; } #=================================== sub _build_domains { #=================================== my $self = shift; my $namespaces = $self->model->namespaces; [ map { $_, @{ $namespaces->{$_}->fixed_domains } } sort keys %$namespaces ]; } #=================================== sub search { #=================================== my $self = shift; $self->model->results_class->new( search => $self->_build_search ) ->as_results; } #=================================== sub cached_search { #=================================== my $self = shift; my $cache = $self->cache or return $self->search; my %cache_opts = ( %{ $self->cache_opts || {} }, @_ == 1 ? %{ $_[0] } : @_ ); $self->model->cached_results_class->new( search => $self->_build_search, cache => $cache, cache_opts => \%cache_opts )->as_results; } #=================================== sub scroll { shift->_scroll(@_)->as_results } #=================================== #=================================== sub scan { #=================================== my $self = shift; croak "A scan cannot be combined with sorting" if @{ $self->sort || [] }; return $self->_scroll( shift, search_type => 'scan', @_ )->as_objects; } #=================================== sub _scroll { #=================================== my $self = shift; my $search = $self->_build_search( scroll => shift() || '1m', @_ ); return $self->model->scrolled_results_class->new( search => $search ); } #=================================== sub delete { #=================================== my $self = shift; $self->model->store->delete_by_query( $self->_build_delete(@_) ); } #=================================== sub first { shift->size(1)->search(@_)->first } sub total { shift->size(0)->search(@_)->total } #=================================== #=================================== sub _build_search { #=================================== my $self = shift; my ( $highlight, $hfields ); if ( $hfields = $self->highlight and keys %$hfields ) { $highlight = { %{ $self->highlighting || {} }, fields => $hfields }; } my $fields = $self->fields; my $source; $source->{include} = $self->include_paths if $self->_has_include_paths; $source->{exclude} = $self->exclude_paths if $self->_has_exclude_paths; $fields = ['_source'] unless $source || @$fields; my %args = _strip_undef( ( map { $_ => $self->$_ } qw( type sort from size aggs min_score post_filter preference routing stats script_fields timeout track_scores explain ) ), facets => $self->_build_facets, index => $self->domain, query => $self->_build_query, highlight => $highlight, indices_boost => $self->index_boosts, @_, version => 1, fields => [ '_parent', '_routing', @$fields ] ); $args{_source} = $source if defined $source; return \%args; } #=================================== sub _build_facets { #=================================== my $self = shift; return undef unless $self->facets; my $facets = { %{ $self->facets } }; for ( values %$facets ) { die "All (facets) must be HASH refs" unless ref $_ eq 'HASH'; $_ = my $facet = {%$_}; $self->_to_dsl( { queryb => 'query', filterb => 'filter', facet_filterb => 'facet_filter' }, $facet ); } $facets; } #=================================== sub _to_dsl { #=================================== my ( $self, $ops ) = ( shift, shift ); my $builder; for my $clause (@_) { while ( my ( $old, $new ) = each %$ops ) { my $src = delete $clause->{$old} or next; die "Cannot specify $old and $new parameters.\n" if $clause->{$new}; $builder ||= $self->search_builder; my $method = $new eq 'query' ? 'query' : 'filter'; my $sub_clause = $builder->$method($src) or next; $clause->{$new} = $sub_clause->{$method}; } } } #=================================== sub _build_query { #=================================== my $self = shift; my $q = $self->query; my $f = $self->filter; return { match_all => {} } unless $q || $f; return !$q ? { constant_score => { filter => $f } } : $f ? { filtered => { query => $q, filter => $f } } : $q; } #=================================== sub _build_delete { #=================================== my $self = shift; my %args = _strip_undef( index => $self->domain, ( map { $_ => $self->$_ } qw(type routing consistency replication) ), @_, query => $self->_build_query, ); return \%args; } #=================================== sub _strip_undef { #=================================== my %args = @_; return map { $_ => $args{$_} } grep { defined $args{$_} } keys %args; } 1; =pod =encoding UTF-8 =head1 NAME Elastic::Model::View - Views to query your docs in Elasticsearch =head1 VERSION version 0.52 =head1 SYNOPSIS $view = $model->view(); # all domains and types known to the model $view = $domain->view(); # just $domain->name, and its types $posts = $view->type( 'post' ); # just type post 10 most relevant posts containing C<'perl'> or C<'moose'> $results = $posts->queryb( content => 'perl moose' )->search; 10 most relevant posts containing C<'perl'> or C<'moose'> published since 1 Jan 2012, sorted by C, with highlighted snippets from the C field: $results = $posts ->queryb ( 'content' => 'perl moose' ) ->filterb ( 'created' => { gte => '2012-01-01' } ) ->sort ( 'timestamp' ) ->highlight ( 'content' ) ->search; The same as the above, but in one step: $results = $domain->view( type => 'post', sort => 'timestamp', queryb => { content => 'perl moose' }, filterb => { created => { gte => '2012-01-01' } }, highlight => 'content', )->search; Efficiently retrieve all posts, unsorted: $results = $posts->size(100)->scan; while (my $result = $results->shift_result) { do_something_with($result); ); Cached results: $cache = CHI->new(....); $view = $view->cache( $cache )->cache_opts( expires_in => '2 min'); $results = $view->queryb( 'perl' )->cached_search(); $results = $view->queryb( 'perl' )->cached_search( expires => '30 sec'); =head1 DESCRIPTION L is used to query your docs in Elasticsearch. Views are "chainable". In other words, you get a clone of the current view every time you set an attribute. For instance, you could do: $all_types = $domain->view; $users = $all_types->type('user'); $posts = $all_types->('post'); $recent_posts = $posts->filterb({ published => { gt => '2012-05-01' }}); Alternatively, you can set all or some of the attributes when you create a view: $recent_posts = $domain->view( type => 'post', filterb => { published => { gt => '2012-05-01 '}} ); Views are also reusable. They only hit the database when you call one of the L, eg: $results = $recent_posts->search; # retrieve $size results $scroll = $recent_posts->scroll; # keep pulling results =head1 METHODS Calling one of the methods listed below executes your query and returns the results. Your C is unchanged and can be reused later. See L for a discussion about when and how to use L, L or L. =head2 search() $results = $view->search(); Executes a search and returns an L object with at most L results. This is useful for returning finite results, ie where you know how many results you want. For instance: I<"give me the 10 best results">. =head2 cached_search() B $results = $view->cache( $cache )->cached_search( %opts ); If a L attribute has been specified for the current view, then L tries to retrieve the search results from the L. If it fails, then a L is executed, and the results are stored in the L. An L object is returned. Any C<%opts> that are passed in override any default L, and are passed to L methods. $view = $view->cache_opts( expires_in => '30 sec' ); $results = $view->cached_search; # 30 seconds $results = $view->cached_search( expires_in => '2 min' ); # 2 minutes Given the near-real-time nature of Elasticsearch, you sometimes want to invalidate a cached result in the near future. For instance, if you have cached a list of comments on a blog post, but then you add a new comment, you want to invalidate the cached comments list. However, the new comment will only become visible to search sometime within the next second, so invalidating the cache immediately may or may not be useful. Use the special argument C to bypass the cache C and to force the cached version to be updated, along with a new expiry time: $results = $view->cached_search( force_set => 1, expires_in => '2 sec'); =head2 scroll() $scroll_timeout = '1m'; $scrolled_results = $view->scroll( $scroll_timeout ); Executes a search and returns an L object which will pull L results from Elasticsearch as required until either (1) no more results are available or (2) more than C<$scroll_timeout> (default 1 minute) elapses between requests to Elasticsearch. Scrolling allows you to return an unbound result set. Useful if you're not sure whether to expect 2 results or 2000. =head2 scan() $timeout = '1m'; $scrolled_results = $view->scan($timeout); L is a special type of L request, intended for efficient handling of large numbers of unsorted docs (eg when you want to reindex all of your data). =head2 first() $result = $view->first(); $object = $view->first->object; Executes the search and returns just the first result. All other metadata is thrown away. =head2 total() $total = $view->total(); Executes the search and returns the total number of matching docs. All other metadta is thrown away. =head2 delete() $results = $view->delete(); Deletes all docs matching the query and returns a hashref indicating success. Any docs that are stored in a live L or are cached somewhere are not removed. Any L are not removed. This should really only be used once you are sure that the matching docs are out of circulation. Also, it is more efficient to just delete a whole index (if possible), rather than deleting large numbers of docs. B The only attributes relevant to L are L, L, L, L, L and L. =head1 CORE ATTRIBUTES =head2 domain $new_view = $view->domain('my_index'); $new_view = $view->domain('index_one','alias_two'); \@domains = $view->domain; Specify one or more domains (indices or aliases) to query. By default, a C created from a L will query just that domain's L. A C created from the L will query all the main domains (ie the L) and L known to the model. =head2 type $new_view = $view->type('user'); $new_view = $view->type('user','post'); \@types = $view->type; By default, a C will query all L known to all the L specified in the view. You can specify one or more types. =head2 query =head2 queryb # native query DSL $new_view = $view->query( text => { title => 'interesting words' } ); # SearchBuilder DSL $new_view = $view->queryb( title => 'interesting words' ); \%query = $view->query Specify the query to run in the native L or use C to specify your query with the more Perlish L query syntax. By default, the query will L. =head2 filter =head2 filterb # native query DSL $new_view = $view->filter( term => { tag => 'perl' } ); # SearchBuilder DSL $new_view = $view->filterb( tag => 'perl' ); \%filter = $view->filter; You can specify a filter to apply to the query results using either the native Elasticsearch query DSL or, use C to specify your filter with the more Perlish L DSL. If a filter is specified, it will be combined with the L as a L, or (if no query is specified) as a L query. =head2 post_filter =head2 post_filterb # native query DSL $new_view = $view->post_filter( term => { tag => 'perl' } ); # SearchBuilder DSL $new_view = $view->post_filterb( tag => 'perl' ); \%filter = $view->post_filter; L filter the results AFTER any L have been calculated. In the above example, the aggregations would be calculated on all values of C, but the results would then be limited to just those docs where C. You can specify a post_filter using either the native Elasticsearch query DSL or, use C to specify it with the more Perlish L DSL. =head2 sort $new_view = $view->sort( '_score' ); # _score desc $new_view = $view->sort( 'timestamp' ); # timestamp asc $new_view = $view->sort( { timestamp => 'asc' } ); # timestamp asc $new_view = $view->sort( { timestamp => 'desc' } ); # timestamp desc $new_view = $view->sort( '_score', # _score desc { timestamp => 'desc' } # then timestamp desc ); \@sort = $view->sort By default, results are sorted by "relevance" (C<< _score => 'desc' >>). You can specify multiple sort arguments, which are applied in order, and can include scripts or geo-distance. See L for more information. B Sorting cannot be combined with L. =head2 from $new_view = $view->from( 10 ); $from = $view->from; By default, results are returned from the first result. Think of it as I<"the number of docs to skip">, so setting C to C<0> would start from the first result. Setting C to C<10> would skip the first 10 results and return docs from result number 11 onwards. =head2 size $new_view = $view->size( 100 ); $size = $view->size; The number of results returned in a single L, which defaults to 10. B See L for a slightly different application of the L value. =head2 aggs $new_view = $view->aggs( active_docs => { filter => { term => { status => 'active' } }, aggs => { popular_tags => { terms => { field => 'path.to.tags', size => 10 } } } }, agg_two => {....} ); $new_view = $view->add_agg( agg_three => {...} ) $new_view = $view->remove_agg('agg_three'); \%aggs = $view->aggs; \%agg = $view->get_agg('active_docs'); Aggregations allow you to aggregate data from a query, for instance: most popular terms, number of blog posts per day, average price etc. Aggs are calculated from the query generated from L and L. If you want to filter your query results down further after calculating your aggs, you can use L. B There is no support in aggs for L. See L for an explanation of what aggregations are available. =head2 facets B Facets are deprecated in favour of L. They will be removed in a future version of Elasticsearch. $new_view = $view->facets( facet_one => { terms => { field => 'field.to.facet', size => 10 }, facet_filterb => { status => 'active' }, }, facet_two => {....} ); $new_view = $view->add_facet( facet_three => {...} ) $new_view = $view->remove_facet('facet_three'); \%facets = $view->facets; \%facet = $view->get_facet('facet_one'); Facets allow you to aggregate data from a query, for instance: most popular terms, number of blog posts per day, average price etc. Facets are calculated from the query generated from L and L. If you want to filter your query results down further after calculating your facets, you can use L. See L for an explanation of what facets are available. =head2 highlight $new_view = $view->highlight( 'field_1', 'field_2' => \%field_2_settings, 'field_3' ); Specify which fields should be used for L. to your search results. You can pass just a list of fields, or fields with their field-specific settings. These values are used to set the C parameter in L. =head2 highlighting $new_view = $view->highlighting( pre_tags => [ '', '' ], post_tags => [ '', '' ], encoder => 'html' ... ); The L attribute is used to pass any highlighting parameters which should be applied to all of the fields set in L (although you can override these settings for individual fields by passing field settings to L). See L. for more about how highlighting works, and L for how to retrieve the highlighted snippets. =head1 OTHER ATTRIBUTES =head2 fields $new_view = $view->fields('title','content'); By default, searches will return the L<_source|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-source-field.html> field which contains the whole document, allowing Elastic::Model to inflate the original object without having to retrieve the document separately. If you would like to just retrieve a subset of fields, you can specify them in L. See L. B If you do specify any fields, and you DON'T include C<'_source'> then the C<_source> field won't be returned, and you won't be able to retrieve the original object without requesting it from Elasticsearch in a separate (but automatic) step. =head2 script_fields $new_view = $view->script_fields( distance => { script => q{doc['location'].distance(lat,lon)}, params => { lat => $lat, lon => $lon } }, $name => \%defn, ... ); $new_view = $view->add_script_field( $name => \%defn ); $new_view = $view->remove_script_field($name); \%fields = $view->script_fields; \%defn = $view->get_script_field($name); L