Search in sources :

Example 11 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project Anserini by castorini.

the class IndexReaderUtils method getTermCountsWithAnalyzer.

/**
 * Returns count information on a term or a phrase.
 *
 * @param reader index reader
 * @param termStr term
 * @param analyzer analyzer to use
 * @return df (+cf if only one term) of the phrase
 * @throws IOException if error encountered during access to index
 */
public static Map<String, Long> getTermCountsWithAnalyzer(IndexReader reader, String termStr, Analyzer analyzer) throws IOException {
    if (AnalyzerUtils.analyze(analyzer, termStr).size() > 1) {
        Query query = new PhraseQueryGenerator().buildQuery(IndexArgs.CONTENTS, analyzer, termStr);
        IndexSearcher searcher = new IndexSearcher(reader);
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        searcher.search(query, totalHitCountCollector);
        return Map.ofEntries(Map.entry("docFreq", (long) totalHitCountCollector.getTotalHits()));
    }
    Term t = new Term(IndexArgs.CONTENTS, AnalyzerUtils.analyze(analyzer, termStr).get(0));
    Map<String, Long> termInfo = Map.ofEntries(Map.entry("collectionFreq", reader.totalTermFreq(t)), Map.entry("docFreq", (long) reader.docFreq(t)));
    return termInfo;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) PhraseQueryGenerator(io.anserini.search.query.PhraseQueryGenerator) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Term(org.apache.lucene.index.Term)

Example 12 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.

the class QueryPhase method execute.

/**
     * In a package-private method so that it can be tested without having to
     * wire everything (mapperService, etc.)
     * @return whether the rescoring phase should be executed
     */
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
    QuerySearchResult queryResult = searchContext.queryResult();
    queryResult.searchTimedOut(false);
    final boolean doProfile = searchContext.getProfilers() != null;
    final SearchType searchType = searchContext.searchType();
    boolean rescore = false;
    try {
        queryResult.from(searchContext.from());
        queryResult.size(searchContext.size());
        Query query = searchContext.query();
        final int totalNumDocs = searcher.getIndexReader().numDocs();
        int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
        Collector collector;
        Callable<TopDocs> topDocsCallable;
        DocValueFormat[] sortValueFormats = new DocValueFormat[0];
        // already rewritten
        assert query == searcher.rewrite(query);
        if (searchContext.size() == 0) {
            // no matter what the value of from is
            final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            collector = totalHitCountCollector;
            if (searchContext.getProfilers() != null) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
                }
            };
        } else {
            // Perhaps have a dedicated scroll phase?
            final ScrollContext scrollContext = searchContext.scrollContext();
            assert (scrollContext != null) == (searchContext.request().scroll() != null);
            final Collector topDocsCollector;
            ScoreDoc after = null;
            if (searchContext.request().scroll() != null) {
                numDocs = Math.min(searchContext.size(), totalNumDocs);
                after = scrollContext.lastEmittedDoc;
                if (returnsDocsInOrder(query, searchContext.sort())) {
                    if (scrollContext.totalHits == -1) {
                        // first round
                        assert scrollContext.lastEmittedDoc == null;
                    // there is not much that we can optimize here since we want to collect all
                    // documents in order to get the total number of hits
                    } else {
                        // skip to the desired doc and stop collecting after ${size} matches
                        if (scrollContext.lastEmittedDoc != null) {
                            BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
                            query = bq;
                        }
                        searchContext.terminateAfter(numDocs);
                    }
                }
            } else {
                after = searchContext.searchAfter();
            }
            if (totalNumDocs == 0) {
                // top collectors don't like a size of 0
                numDocs = 1;
            }
            assert numDocs > 0;
            if (searchContext.collapse() == null) {
                if (searchContext.sort() != null) {
                    SortAndFormats sf = searchContext.sort();
                    topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
                    sortValueFormats = sf.formats;
                } else {
                    rescore = !searchContext.rescore().isEmpty();
                    for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
                        numDocs = Math.max(rescoreContext.window(), numDocs);
                    }
                    topDocsCollector = TopScoreDocCollector.create(numDocs, after);
                }
            } else {
                Sort sort = Sort.RELEVANCE;
                if (searchContext.sort() != null) {
                    sort = searchContext.sort().sort;
                }
                CollapseContext collapse = searchContext.collapse();
                topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
                if (searchContext.sort() == null) {
                    sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
                } else {
                    sortValueFormats = searchContext.sort().formats;
                }
            }
            collector = topDocsCollector;
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    final TopDocs topDocs;
                    if (topDocsCollector instanceof TopDocsCollector) {
                        topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
                    } else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
                        topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
                    } else {
                        throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
                    }
                    if (scrollContext != null) {
                        if (scrollContext.totalHits == -1) {
                            // first round
                            scrollContext.totalHits = topDocs.totalHits;
                            scrollContext.maxScore = topDocs.getMaxScore();
                        } else {
                            // subsequent round: the total number of hits and
                            // the maximum score were computed on the first round
                            topDocs.totalHits = scrollContext.totalHits;
                            topDocs.setMaxScore(scrollContext.maxScore);
                        }
                        if (searchContext.request().numberOfShards() == 1) {
                            // if we fetch the document in the same roundtrip, we already know the last emitted doc
                            if (topDocs.scoreDocs.length > 0) {
                                // set the last emitted doc
                                scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
                            }
                        }
                    }
                    return topDocs;
                }
            };
        }
        final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
        if (terminateAfterSet) {
            final Collector child = collector;
            // throws Lucene.EarlyTerminationException when given count is reached
            collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (searchContext.parsedPostFilter() != null) {
            final Collector child = collector;
            // this will only get applied to the actual search collector and not
            // to any scoped collectors, also, it will only be applied to the main collector
            // since that is where the filter should only work
            final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
            collector = new FilteredCollector(collector, filterWeight);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        // plug in additional collectors, like aggregations
        final List<Collector> subCollectors = new ArrayList<>();
        subCollectors.add(collector);
        subCollectors.addAll(searchContext.queryCollectors().values());
        collector = MultiCollector.wrap(subCollectors);
        if (doProfile && collector instanceof InternalProfileCollector == false) {
            // When there is a single collector to wrap, MultiCollector returns it
            // directly, so only wrap in the case that there are several sub collectors
            final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {

                @Override
                public InternalProfileCollector get(int index) {
                    return (InternalProfileCollector) subCollectors.get(index);
                }

                @Override
                public int size() {
                    return subCollectors.size();
                }
            };
            collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
        }
        // apply the minimum score after multi collector so we filter aggs as well
        if (searchContext.minimumScore() != null) {
            final Collector child = collector;
            collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector.getClass() == TotalHitCountCollector.class) {
            // instead of using a collector
            while (true) {
                // a constant_score query
                if (query instanceof ConstantScoreQuery) {
                    query = ((ConstantScoreQuery) query).getQuery();
                } else {
                    break;
                }
            }
            if (query.getClass() == MatchAllDocsQuery.class) {
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = searcher.getIndexReader().numDocs();
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
                final Term term = ((TermQuery) query).getTerm();
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = 0;
                        for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                            count += context.reader().docFreq(term);
                        }
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            }
        }
        final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
        if (timeoutSet && collector != null) {
            // collector might be null if no collection is actually needed
            final Collector child = collector;
            // TODO: change to use our own counter that uses the scheduler in ThreadPool
            // throws TimeLimitingCollector.TimeExceededException when timeout has reached
            collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector != null) {
            final Collector child = collector;
            collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        try {
            if (collector != null) {
                if (doProfile) {
                    searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
                }
                searcher.search(query, collector);
            }
        } catch (TimeLimitingCollector.TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            queryResult.searchTimedOut(true);
        } catch (Lucene.EarlyTerminationException e) {
            assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
            queryResult.terminatedEarly(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        if (terminateAfterSet && queryResult.terminatedEarly() == null) {
            queryResult.terminatedEarly(false);
        }
        queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
        if (searchContext.getProfilers() != null) {
            ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
            searchContext.queryResult().profileResults(shardResults);
        }
        return rescore;
    } catch (Exception e) {
        throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MinDocQuery(org.apache.lucene.queries.MinDocQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) FieldDoc(org.apache.lucene.search.FieldDoc) RescoreSearchContext(org.elasticsearch.search.rescore.RescoreSearchContext) ArrayList(java.util.ArrayList) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) Lucene(org.elasticsearch.common.lucene.Lucene) Callable(java.util.concurrent.Callable) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MinDocQuery(org.apache.lucene.queries.MinDocQuery) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MultiCollector(org.apache.lucene.search.MultiCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) Sort(org.apache.lucene.search.Sort) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) SearchType(org.elasticsearch.action.search.SearchType) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) ProfileShardResult(org.elasticsearch.search.profile.ProfileShardResult) AbstractList(java.util.AbstractList) DocValueFormat(org.elasticsearch.search.DocValueFormat) ScrollContext(org.elasticsearch.search.internal.ScrollContext) Term(org.apache.lucene.index.Term) SortAndFormats(org.elasticsearch.search.sort.SortAndFormats) Weight(org.apache.lucene.search.Weight) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) CollapseContext(org.elasticsearch.search.collapse.CollapseContext)

Example 13 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.

the class NestedChildrenFilterTests method testNestedChildrenFilter.

public void testNestedChildrenFilter() throws Exception {
    int numParentDocs = scaledRandomIntBetween(0, 32);
    int maxChildDocsPerParent = scaledRandomIntBetween(8, 16);
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numParentDocs; i++) {
        int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent);
        List<Document> docs = new ArrayList<>(numChildDocs + 1);
        for (int j = 0; j < numChildDocs; j++) {
            Document childDoc = new Document();
            childDoc.add(new StringField("type", "child", Field.Store.NO));
            docs.add(childDoc);
        }
        Document parenDoc = new Document();
        parenDoc.add(new StringField("type", "parent", Field.Store.NO));
        parenDoc.add(new LegacyIntField("num_child_docs", numChildDocs, Field.Store.YES));
        docs.add(parenDoc);
        writer.addDocuments(docs);
    }
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = new IndexSearcher(reader);
    FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
    BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
    Query childFilter = new TermQuery(new Term("type", "child"));
    int checkedParents = 0;
    final Weight parentsWeight = searcher.createNormalizedWeight(new TermQuery(new Term("type", "parent")), false);
    for (LeafReaderContext leaf : reader.leaves()) {
        DocIdSetIterator parents = parentsWeight.scorer(leaf).iterator();
        for (int parentDoc = parents.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = parents.nextDoc()) {
            int expectedChildDocs = leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue();
            hitContext.reset(null, leaf, parentDoc, searcher);
            NestedChildrenQuery nestedChildrenFilter = new NestedChildrenQuery(parentFilter, childFilter, hitContext);
            TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector);
            assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs));
            checkedParents++;
        }
    }
    assertThat(checkedParents, equalTo(numParentDocs));
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) NestedChildrenQuery(org.elasticsearch.search.fetch.subphase.InnerHitsContext.NestedInnerHits.NestedChildrenQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Weight(org.apache.lucene.search.Weight) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) BitSetProducer(org.apache.lucene.search.join.BitSetProducer) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) NestedChildrenQuery(org.elasticsearch.search.fetch.subphase.InnerHitsContext.NestedInnerHits.NestedChildrenQuery) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) LegacyIntField(org.apache.lucene.document.LegacyIntField)

Example 14 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.

the class SearchCancellationTests method testLowLevelCancellableCollector.

public void testLowLevelCancellableCollector() throws IOException {
    TotalHitCountCollector collector = new TotalHitCountCollector();
    AtomicBoolean cancelled = new AtomicBoolean();
    CancellableCollector cancellableCollector = new CancellableCollector(cancelled::get, true, collector);
    final LeafCollector leafCollector = cancellableCollector.getLeafCollector(reader.leaves().get(0));
    leafCollector.collect(0);
    cancelled.set(true);
    expectThrows(TaskCancelledException.class, () -> leafCollector.collect(1));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LeafCollector(org.apache.lucene.search.LeafCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) CancellableCollector(org.elasticsearch.search.query.CancellableCollector)

Example 15 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class HeatmapFacetCounterTest method countMatchingDocsAtLevel.

private int countMatchingDocsAtLevel(Point pt, int facetLevel) throws IOException {
    // we use IntersectsPrefixTreeFilter directly so that we can specify the level to go to exactly.
    RecursivePrefixTreeStrategy strategy = (RecursivePrefixTreeStrategy) this.strategy;
    Query filter = new IntersectsPrefixTreeQuery(pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels());
    final TotalHitCountCollector collector = new TotalHitCountCollector();
    indexSearcher.search(filter, collector);
    cellsValidated++;
    if (collector.getTotalHits() > 0) {
        cellValidatedNonZero++;
    }
    return collector.getTotalHits();
}
Also used : Query(org.apache.lucene.search.Query) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector)

Aggregations

TotalHitCountCollector (org.apache.lucene.search.TotalHitCountCollector)32 TermQuery (org.apache.lucene.search.TermQuery)17 Term (org.apache.lucene.index.Term)13 BooleanQuery (org.apache.lucene.search.BooleanQuery)12 IOException (java.io.IOException)10 IndexSearcher (org.apache.lucene.search.IndexSearcher)10 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)10 Query (org.apache.lucene.search.Query)10 ArrayList (java.util.ArrayList)7 Searcher (org.elasticsearch.index.engine.Engine.Searcher)7 LongPoint (org.apache.lucene.document.LongPoint)6 Test (org.junit.Test)6 TopDocs (org.apache.lucene.search.TopDocs)5 HashSet (java.util.HashSet)4 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 Document (org.apache.lucene.document.Document)4 IndexReader (org.apache.lucene.index.IndexReader)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 BooleanClause (org.apache.lucene.search.BooleanClause)4 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)4