Search in sources :

Example 1 with MinimumScoreCollector

use of org.elasticsearch.common.lucene.MinimumScoreCollector in project crate by crate.

the class LuceneOrderedDocCollector method initialSearch.

private KeyIterable<ShardId, Row> initialSearch() throws IOException {
    for (LuceneCollectorExpression<?> expression : expressions) {
        expression.startCollect(collectorContext);
        expression.setScorer(scorer);
    }
    TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, batchSize, true, doDocsScores, doDocsScores);
    Collector collector = topFieldCollector;
    if (minScore != null) {
        collector = new MinimumScoreCollector(collector, minScore);
    }
    searcher.search(query, collector);
    return scoreDocToIterable(topFieldCollector.topDocs().scoreDocs);
}
Also used : MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector)

Example 2 with MinimumScoreCollector

use of org.elasticsearch.common.lucene.MinimumScoreCollector in project elasticsearch by elastic.

the class QueryPhase method execute.

/**
     * In a package-private method so that it can be tested without having to
     * wire everything (mapperService, etc.)
     * @return whether the rescoring phase should be executed
     */
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
    QuerySearchResult queryResult = searchContext.queryResult();
    queryResult.searchTimedOut(false);
    final boolean doProfile = searchContext.getProfilers() != null;
    final SearchType searchType = searchContext.searchType();
    boolean rescore = false;
    try {
        queryResult.from(searchContext.from());
        queryResult.size(searchContext.size());
        Query query = searchContext.query();
        final int totalNumDocs = searcher.getIndexReader().numDocs();
        int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
        Collector collector;
        Callable<TopDocs> topDocsCallable;
        DocValueFormat[] sortValueFormats = new DocValueFormat[0];
        // already rewritten
        assert query == searcher.rewrite(query);
        if (searchContext.size() == 0) {
            // no matter what the value of from is
            final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            collector = totalHitCountCollector;
            if (searchContext.getProfilers() != null) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
                }
            };
        } else {
            // Perhaps have a dedicated scroll phase?
            final ScrollContext scrollContext = searchContext.scrollContext();
            assert (scrollContext != null) == (searchContext.request().scroll() != null);
            final Collector topDocsCollector;
            ScoreDoc after = null;
            if (searchContext.request().scroll() != null) {
                numDocs = Math.min(searchContext.size(), totalNumDocs);
                after = scrollContext.lastEmittedDoc;
                if (returnsDocsInOrder(query, searchContext.sort())) {
                    if (scrollContext.totalHits == -1) {
                        // first round
                        assert scrollContext.lastEmittedDoc == null;
                    // there is not much that we can optimize here since we want to collect all
                    // documents in order to get the total number of hits
                    } else {
                        // skip to the desired doc and stop collecting after ${size} matches
                        if (scrollContext.lastEmittedDoc != null) {
                            BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
                            query = bq;
                        }
                        searchContext.terminateAfter(numDocs);
                    }
                }
            } else {
                after = searchContext.searchAfter();
            }
            if (totalNumDocs == 0) {
                // top collectors don't like a size of 0
                numDocs = 1;
            }
            assert numDocs > 0;
            if (searchContext.collapse() == null) {
                if (searchContext.sort() != null) {
                    SortAndFormats sf = searchContext.sort();
                    topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
                    sortValueFormats = sf.formats;
                } else {
                    rescore = !searchContext.rescore().isEmpty();
                    for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
                        numDocs = Math.max(rescoreContext.window(), numDocs);
                    }
                    topDocsCollector = TopScoreDocCollector.create(numDocs, after);
                }
            } else {
                Sort sort = Sort.RELEVANCE;
                if (searchContext.sort() != null) {
                    sort = searchContext.sort().sort;
                }
                CollapseContext collapse = searchContext.collapse();
                topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
                if (searchContext.sort() == null) {
                    sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
                } else {
                    sortValueFormats = searchContext.sort().formats;
                }
            }
            collector = topDocsCollector;
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    final TopDocs topDocs;
                    if (topDocsCollector instanceof TopDocsCollector) {
                        topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
                    } else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
                        topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
                    } else {
                        throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
                    }
                    if (scrollContext != null) {
                        if (scrollContext.totalHits == -1) {
                            // first round
                            scrollContext.totalHits = topDocs.totalHits;
                            scrollContext.maxScore = topDocs.getMaxScore();
                        } else {
                            // subsequent round: the total number of hits and
                            // the maximum score were computed on the first round
                            topDocs.totalHits = scrollContext.totalHits;
                            topDocs.setMaxScore(scrollContext.maxScore);
                        }
                        if (searchContext.request().numberOfShards() == 1) {
                            // if we fetch the document in the same roundtrip, we already know the last emitted doc
                            if (topDocs.scoreDocs.length > 0) {
                                // set the last emitted doc
                                scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
                            }
                        }
                    }
                    return topDocs;
                }
            };
        }
        final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
        if (terminateAfterSet) {
            final Collector child = collector;
            // throws Lucene.EarlyTerminationException when given count is reached
            collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (searchContext.parsedPostFilter() != null) {
            final Collector child = collector;
            // this will only get applied to the actual search collector and not
            // to any scoped collectors, also, it will only be applied to the main collector
            // since that is where the filter should only work
            final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
            collector = new FilteredCollector(collector, filterWeight);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        // plug in additional collectors, like aggregations
        final List<Collector> subCollectors = new ArrayList<>();
        subCollectors.add(collector);
        subCollectors.addAll(searchContext.queryCollectors().values());
        collector = MultiCollector.wrap(subCollectors);
        if (doProfile && collector instanceof InternalProfileCollector == false) {
            // When there is a single collector to wrap, MultiCollector returns it
            // directly, so only wrap in the case that there are several sub collectors
            final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {

                @Override
                public InternalProfileCollector get(int index) {
                    return (InternalProfileCollector) subCollectors.get(index);
                }

                @Override
                public int size() {
                    return subCollectors.size();
                }
            };
            collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
        }
        // apply the minimum score after multi collector so we filter aggs as well
        if (searchContext.minimumScore() != null) {
            final Collector child = collector;
            collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector.getClass() == TotalHitCountCollector.class) {
            // instead of using a collector
            while (true) {
                // a constant_score query
                if (query instanceof ConstantScoreQuery) {
                    query = ((ConstantScoreQuery) query).getQuery();
                } else {
                    break;
                }
            }
            if (query.getClass() == MatchAllDocsQuery.class) {
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = searcher.getIndexReader().numDocs();
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
                final Term term = ((TermQuery) query).getTerm();
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = 0;
                        for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                            count += context.reader().docFreq(term);
                        }
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            }
        }
        final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
        if (timeoutSet && collector != null) {
            // collector might be null if no collection is actually needed
            final Collector child = collector;
            // TODO: change to use our own counter that uses the scheduler in ThreadPool
            // throws TimeLimitingCollector.TimeExceededException when timeout has reached
            collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector != null) {
            final Collector child = collector;
            collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        try {
            if (collector != null) {
                if (doProfile) {
                    searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
                }
                searcher.search(query, collector);
            }
        } catch (TimeLimitingCollector.TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            queryResult.searchTimedOut(true);
        } catch (Lucene.EarlyTerminationException e) {
            assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
            queryResult.terminatedEarly(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        if (terminateAfterSet && queryResult.terminatedEarly() == null) {
            queryResult.terminatedEarly(false);
        }
        queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
        if (searchContext.getProfilers() != null) {
            ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
            searchContext.queryResult().profileResults(shardResults);
        }
        return rescore;
    } catch (Exception e) {
        throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MinDocQuery(org.apache.lucene.queries.MinDocQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) FieldDoc(org.apache.lucene.search.FieldDoc) RescoreSearchContext(org.elasticsearch.search.rescore.RescoreSearchContext) ArrayList(java.util.ArrayList) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) Lucene(org.elasticsearch.common.lucene.Lucene) Callable(java.util.concurrent.Callable) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MinDocQuery(org.apache.lucene.queries.MinDocQuery) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MultiCollector(org.apache.lucene.search.MultiCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) Sort(org.apache.lucene.search.Sort) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) SearchType(org.elasticsearch.action.search.SearchType) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) ProfileShardResult(org.elasticsearch.search.profile.ProfileShardResult) AbstractList(java.util.AbstractList) DocValueFormat(org.elasticsearch.search.DocValueFormat) ScrollContext(org.elasticsearch.search.internal.ScrollContext) Term(org.apache.lucene.index.Term) SortAndFormats(org.elasticsearch.search.sort.SortAndFormats) Weight(org.apache.lucene.search.Weight) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) CollapseContext(org.elasticsearch.search.collapse.CollapseContext)

Example 3 with MinimumScoreCollector

use of org.elasticsearch.common.lucene.MinimumScoreCollector in project crate by crate.

the class LuceneOrderedDocCollector method doSearch.

private KeyIterable<ShardId, Row> doSearch(TopFieldCollector topFieldCollector, Float minScore, Query query) throws IOException {
    Collector collector = topFieldCollector;
    if (minScore != null) {
        collector = new MinimumScoreCollector(collector, minScore);
    }
    collector = new KillableCollector(collector, this::raiseIfKilled);
    searcher.search(query, collector);
    ScoreDoc[] scoreDocs = topFieldCollector.topDocs().scoreDocs;
    if (doDocsScores) {
        TopFieldCollector.populateScores(scoreDocs, searcher, query);
    }
    return scoreDocToIterable(scoreDocs);
}
Also used : MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Aggregations

MinimumScoreCollector (org.elasticsearch.common.lucene.MinimumScoreCollector)3 Collector (org.apache.lucene.search.Collector)2 ScoreDoc (org.apache.lucene.search.ScoreDoc)2 TopFieldCollector (org.apache.lucene.search.TopFieldCollector)2 AbstractList (java.util.AbstractList)1 ArrayList (java.util.ArrayList)1 Callable (java.util.concurrent.Callable)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Term (org.apache.lucene.index.Term)1 MinDocQuery (org.apache.lucene.queries.MinDocQuery)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)1 FieldDoc (org.apache.lucene.search.FieldDoc)1 LeafCollector (org.apache.lucene.search.LeafCollector)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1 MultiCollector (org.apache.lucene.search.MultiCollector)1 Query (org.apache.lucene.search.Query)1 Sort (org.apache.lucene.search.Sort)1 TermQuery (org.apache.lucene.search.TermQuery)1 TimeLimitingCollector (org.apache.lucene.search.TimeLimitingCollector)1