use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class SimpleNaiveBayesClassifier method getWordFreqForClass.
/**
* Returns the number of documents of the input class ( from the whole index or from a subset)
* that contains the word ( in a specific field or in all the fields if no one selected)
* @param word the token produced by the analyzer
* @param term the term representing the class
* @return the number of documents of the input class
* @throws IOException if a low level I/O problem happens
*/
private int getWordFreqForClass(String word, Term term) throws IOException {
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
for (String textFieldName : textFieldNames) {
subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
}
booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
if (query != null) {
booleanQuery.add(query, BooleanClause.Occur.MUST);
}
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
return totalHitCountCollector.getTotalHits();
}
use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class SimpleNaiveBayesClassifier method countDocsWithClass.
/**
* count the number of documents in the index having at least a value for the 'class' field
*
* @return the no. of documents having a value for the 'class' field
* @throws IOException if accessing to term vectors or search fails
*/
protected int countDocsWithClass() throws IOException {
Terms terms = MultiFields.getTerms(this.indexReader, this.classFieldName);
int docCount;
if (terms == null || terms.getDocCount() == -1) {
// in case codec doesn't support getDocCount
TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
BooleanQuery.Builder q = new BooleanQuery.Builder();
q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
if (query != null) {
q.add(query, BooleanClause.Occur.MUST);
}
indexSearcher.search(q.build(), classQueryCountCollector);
docCount = classQueryCountCollector.getTotalHits();
} else {
docCount = terms.getDocCount();
}
return docCount;
}
use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class FacetsCollector method doSearch.
private static TopDocs doSearch(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, boolean doDocScores, boolean doMaxScore, Collector fc) throws IOException {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
limit = 1;
}
n = Math.min(n, limit);
if (after != null && after.doc >= limit) {
throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.doc + " limit=" + limit);
}
TopDocs topDocs = null;
if (n == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
topDocs = new TopDocs(totalHitCountCollector.getTotalHits(), new ScoreDoc[0], Float.NaN);
} else {
TopDocsCollector<?> hitsCollector;
if (sort != null) {
if (after != null && !(after instanceof FieldDoc)) {
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
boolean fillFields = true;
hitsCollector = TopFieldCollector.create(sort, n, (FieldDoc) after, fillFields, doDocScores, doMaxScore);
} else {
hitsCollector = TopScoreDocCollector.create(n, after);
}
searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
topDocs = hitsCollector.topDocs();
}
return topDocs;
}
use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class TestJoinUtil method testMinMaxDocs.
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
int minChildDocsPerParent = 2;
int maxChildDocsPerParent = 16;
int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
int[] childDocsPerParent = new int[numParents];
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
childDocsPerParent[p] = numChildren;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(childDoc);
}
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
Query fromQuery = new TermQuery(new Term("type", "from"));
Query toQuery = new TermQuery(new Term("type", "to"));
int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
for (int i = 1; i <= iters; i++) {
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
if (VERBOSE) {
System.out.println("iter=" + i);
System.out.println("scoreMode=" + scoreMode);
System.out.println("min=" + min);
System.out.println("max=" + max);
}
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(joinQuery, collector);
int expectedCount = 0;
for (int numChildDocs : childDocsPerParent) {
if (numChildDocs >= min && numChildDocs <= max) {
expectedCount++;
}
}
assertEquals(expectedCount, collector.getTotalHits());
}
searcher.getIndexReader().close();
dir.close();
}
use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.
the class CommandHandler method searchWithTimeLimiter.
/**
* Invokes search with the specified filter and collector.
* If a time limit has been specified then wrap the collector in the TimeLimitingCollector
*/
private void searchWithTimeLimiter(Query query, ProcessedFilter filter, Collector collector) throws IOException {
if (queryCommand.getTimeAllowed() > 0) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), queryCommand.getTimeAllowed());
}
TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
if (includeHitCount) {
collector = MultiCollector.wrap(collector, hitCountCollector);
}
if (filter.filter != null) {
query = new BooleanQuery.Builder().add(query, Occur.MUST).add(filter.filter, Occur.FILTER).build();
}
if (filter.postFilter != null) {
filter.postFilter.setLastDelegate(collector);
collector = filter.postFilter;
}
try {
searcher.search(query, collector);
} catch (TimeLimitingCollector.TimeExceededException | ExitableDirectoryReader.ExitingReaderException x) {
partialResults = true;
logger.warn("Query: " + query + "; " + x.getMessage());
}
if (includeHitCount) {
totalHitCount = hitCountCollector.getTotalHits();
}
}
Aggregations