use of org.apache.lucene.search.TotalHitCountCollector in project Anserini by castorini.
the class IndexReaderUtils method getTermCountsWithAnalyzer.
/**
* Returns count information on a term or a phrase.
*
* @param reader index reader
* @param termStr term
* @param analyzer analyzer to use
* @return df (+cf if only one term) of the phrase
* @throws IOException if error encountered during access to index
*/
public static Map<String, Long> getTermCountsWithAnalyzer(IndexReader reader, String termStr, Analyzer analyzer) throws IOException {
if (AnalyzerUtils.analyze(analyzer, termStr).size() > 1) {
Query query = new PhraseQueryGenerator().buildQuery(IndexArgs.CONTENTS, analyzer, termStr);
IndexSearcher searcher = new IndexSearcher(reader);
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(query, totalHitCountCollector);
return Map.ofEntries(Map.entry("docFreq", (long) totalHitCountCollector.getTotalHits()));
}
Term t = new Term(IndexArgs.CONTENTS, AnalyzerUtils.analyze(analyzer, termStr).get(0));
Map<String, Long> termInfo = Map.ofEntries(Map.entry("collectionFreq", reader.totalTermFreq(t)), Map.entry("docFreq", (long) reader.docFreq(t)));
return termInfo;
}
use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.
the class QueryPhase method execute.
/**
* In a package-private method so that it can be tested without having to
* wire everything (mapperService, etc.)
* @return whether the rescoring phase should be executed
*/
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
QuerySearchResult queryResult = searchContext.queryResult();
queryResult.searchTimedOut(false);
final boolean doProfile = searchContext.getProfilers() != null;
final SearchType searchType = searchContext.searchType();
boolean rescore = false;
try {
queryResult.from(searchContext.from());
queryResult.size(searchContext.size());
Query query = searchContext.query();
final int totalNumDocs = searcher.getIndexReader().numDocs();
int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
Collector collector;
Callable<TopDocs> topDocsCallable;
DocValueFormat[] sortValueFormats = new DocValueFormat[0];
// already rewritten
assert query == searcher.rewrite(query);
if (searchContext.size() == 0) {
// no matter what the value of from is
final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
collector = totalHitCountCollector;
if (searchContext.getProfilers() != null) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
}
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
}
};
} else {
// Perhaps have a dedicated scroll phase?
final ScrollContext scrollContext = searchContext.scrollContext();
assert (scrollContext != null) == (searchContext.request().scroll() != null);
final Collector topDocsCollector;
ScoreDoc after = null;
if (searchContext.request().scroll() != null) {
numDocs = Math.min(searchContext.size(), totalNumDocs);
after = scrollContext.lastEmittedDoc;
if (returnsDocsInOrder(query, searchContext.sort())) {
if (scrollContext.totalHits == -1) {
// first round
assert scrollContext.lastEmittedDoc == null;
// there is not much that we can optimize here since we want to collect all
// documents in order to get the total number of hits
} else {
// skip to the desired doc and stop collecting after ${size} matches
if (scrollContext.lastEmittedDoc != null) {
BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
query = bq;
}
searchContext.terminateAfter(numDocs);
}
}
} else {
after = searchContext.searchAfter();
}
if (totalNumDocs == 0) {
// top collectors don't like a size of 0
numDocs = 1;
}
assert numDocs > 0;
if (searchContext.collapse() == null) {
if (searchContext.sort() != null) {
SortAndFormats sf = searchContext.sort();
topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
sortValueFormats = sf.formats;
} else {
rescore = !searchContext.rescore().isEmpty();
for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
numDocs = Math.max(rescoreContext.window(), numDocs);
}
topDocsCollector = TopScoreDocCollector.create(numDocs, after);
}
} else {
Sort sort = Sort.RELEVANCE;
if (searchContext.sort() != null) {
sort = searchContext.sort().sort;
}
CollapseContext collapse = searchContext.collapse();
topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
if (searchContext.sort() == null) {
sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
} else {
sortValueFormats = searchContext.sort().formats;
}
}
collector = topDocsCollector;
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
}
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
final TopDocs topDocs;
if (topDocsCollector instanceof TopDocsCollector) {
topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
} else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
} else {
throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
}
if (scrollContext != null) {
if (scrollContext.totalHits == -1) {
// first round
scrollContext.totalHits = topDocs.totalHits;
scrollContext.maxScore = topDocs.getMaxScore();
} else {
// subsequent round: the total number of hits and
// the maximum score were computed on the first round
topDocs.totalHits = scrollContext.totalHits;
topDocs.setMaxScore(scrollContext.maxScore);
}
if (searchContext.request().numberOfShards() == 1) {
// if we fetch the document in the same roundtrip, we already know the last emitted doc
if (topDocs.scoreDocs.length > 0) {
// set the last emitted doc
scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
}
}
}
return topDocs;
}
};
}
final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
if (terminateAfterSet) {
final Collector child = collector;
// throws Lucene.EarlyTerminationException when given count is reached
collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
}
}
if (searchContext.parsedPostFilter() != null) {
final Collector child = collector;
// this will only get applied to the actual search collector and not
// to any scoped collectors, also, it will only be applied to the main collector
// since that is where the filter should only work
final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
collector = new FilteredCollector(collector, filterWeight);
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
}
}
// plug in additional collectors, like aggregations
final List<Collector> subCollectors = new ArrayList<>();
subCollectors.add(collector);
subCollectors.addAll(searchContext.queryCollectors().values());
collector = MultiCollector.wrap(subCollectors);
if (doProfile && collector instanceof InternalProfileCollector == false) {
// When there is a single collector to wrap, MultiCollector returns it
// directly, so only wrap in the case that there are several sub collectors
final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {
@Override
public InternalProfileCollector get(int index) {
return (InternalProfileCollector) subCollectors.get(index);
}
@Override
public int size() {
return subCollectors.size();
}
};
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
}
// apply the minimum score after multi collector so we filter aggs as well
if (searchContext.minimumScore() != null) {
final Collector child = collector;
collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
}
}
if (collector.getClass() == TotalHitCountCollector.class) {
// instead of using a collector
while (true) {
// a constant_score query
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
} else {
break;
}
}
if (query.getClass() == MatchAllDocsQuery.class) {
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = searcher.getIndexReader().numDocs();
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
} else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
final Term term = ((TermQuery) query).getTerm();
collector = null;
topDocsCallable = new Callable<TopDocs>() {
@Override
public TopDocs call() throws Exception {
int count = 0;
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
count += context.reader().docFreq(term);
}
return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
}
};
}
}
final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
if (timeoutSet && collector != null) {
// collector might be null if no collection is actually needed
final Collector child = collector;
// TODO: change to use our own counter that uses the scheduler in ThreadPool
// throws TimeLimitingCollector.TimeExceededException when timeout has reached
collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
}
}
if (collector != null) {
final Collector child = collector;
collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
if (doProfile) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
}
}
try {
if (collector != null) {
if (doProfile) {
searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
}
searcher.search(query, collector);
}
} catch (TimeLimitingCollector.TimeExceededException e) {
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
queryResult.searchTimedOut(true);
} catch (Lucene.EarlyTerminationException e) {
assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
queryResult.terminatedEarly(true);
} finally {
searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
}
if (terminateAfterSet && queryResult.terminatedEarly() == null) {
queryResult.terminatedEarly(false);
}
queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
if (searchContext.getProfilers() != null) {
ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
searchContext.queryResult().profileResults(shardResults);
}
return rescore;
} catch (Exception e) {
throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
}
}
use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.
the class NestedChildrenFilterTests method testNestedChildrenFilter.
public void testNestedChildrenFilter() throws Exception {
int numParentDocs = scaledRandomIntBetween(0, 32);
int maxChildDocsPerParent = scaledRandomIntBetween(8, 16);
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numParentDocs; i++) {
int numChildDocs = scaledRandomIntBetween(0, maxChildDocsPerParent);
List<Document> docs = new ArrayList<>(numChildDocs + 1);
for (int j = 0; j < numChildDocs; j++) {
Document childDoc = new Document();
childDoc.add(new StringField("type", "child", Field.Store.NO));
docs.add(childDoc);
}
Document parenDoc = new Document();
parenDoc.add(new StringField("type", "parent", Field.Store.NO));
parenDoc.add(new LegacyIntField("num_child_docs", numChildDocs, Field.Store.YES));
docs.add(parenDoc);
writer.addDocuments(docs);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
BitSetProducer parentFilter = new QueryBitSetProducer(new TermQuery(new Term("type", "parent")));
Query childFilter = new TermQuery(new Term("type", "child"));
int checkedParents = 0;
final Weight parentsWeight = searcher.createNormalizedWeight(new TermQuery(new Term("type", "parent")), false);
for (LeafReaderContext leaf : reader.leaves()) {
DocIdSetIterator parents = parentsWeight.scorer(leaf).iterator();
for (int parentDoc = parents.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = parents.nextDoc()) {
int expectedChildDocs = leaf.reader().document(parentDoc).getField("num_child_docs").numericValue().intValue();
hitContext.reset(null, leaf, parentDoc, searcher);
NestedChildrenQuery nestedChildrenFilter = new NestedChildrenQuery(parentFilter, childFilter, hitContext);
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(new ConstantScoreQuery(nestedChildrenFilter), totalHitCountCollector);
assertThat(totalHitCountCollector.getTotalHits(), equalTo(expectedChildDocs));
checkedParents++;
}
}
assertThat(checkedParents, equalTo(numParentDocs));
reader.close();
dir.close();
}
use of org.apache.lucene.search.TotalHitCountCollector in project elasticsearch by elastic.
the class SearchCancellationTests method testLowLevelCancellableCollector.
public void testLowLevelCancellableCollector() throws IOException {
TotalHitCountCollector collector = new TotalHitCountCollector();
AtomicBoolean cancelled = new AtomicBoolean();
CancellableCollector cancellableCollector = new CancellableCollector(cancelled::get, true, collector);
final LeafCollector leafCollector = cancellableCollector.getLeafCollector(reader.leaves().get(0));
leafCollector.collect(0);
cancelled.set(true);
expectThrows(TaskCancelledException.class, () -> leafCollector.collect(1));
}
use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class HeatmapFacetCounterTest method countMatchingDocsAtLevel.
private int countMatchingDocsAtLevel(Point pt, int facetLevel) throws IOException {
// we use IntersectsPrefixTreeFilter directly so that we can specify the level to go to exactly.
RecursivePrefixTreeStrategy strategy = (RecursivePrefixTreeStrategy) this.strategy;
Query filter = new IntersectsPrefixTreeQuery(pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels());
final TotalHitCountCollector collector = new TotalHitCountCollector();
indexSearcher.search(filter, collector);
cellsValidated++;
if (collector.getTotalHits() > 0) {
cellValidatedNonZero++;
}
return collector.getTotalHits();
}
Aggregations