Search in sources :

Example 16 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class SimpleNaiveBayesClassifier method getWordFreqForClass.

/**
   * Returns the number of documents of the input class ( from the whole index or from a subset)
   * that contains the word ( in a specific field or in all the fields if no one selected)
   * @param word the token produced by the analyzer
   * @param term the term representing the class
   * @return the number of documents of the input class
   * @throws IOException if a low level I/O problem happens
   */
private int getWordFreqForClass(String word, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    for (String textFieldName : textFieldNames) {
        subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Term(org.apache.lucene.index.Term)

Example 17 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class SimpleNaiveBayesClassifier method countDocsWithClass.

/**
   * count the number of documents in the index having at least a value for the 'class' field
   *
   * @return the no. of documents having a value for the 'class' field
   * @throws IOException if accessing to term vectors or search fails
   */
protected int countDocsWithClass() throws IOException {
    Terms terms = MultiFields.getTerms(this.indexReader, this.classFieldName);
    int docCount;
    if (terms == null || terms.getDocCount() == -1) {
        // in case codec doesn't support getDocCount
        TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
        BooleanQuery.Builder q = new BooleanQuery.Builder();
        q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
        if (query != null) {
            q.add(query, BooleanClause.Occur.MUST);
        }
        indexSearcher.search(q.build(), classQueryCountCollector);
        docCount = classQueryCountCollector.getTotalHits();
    } else {
        docCount = terms.getDocCount();
    }
    return docCount;
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Terms(org.apache.lucene.index.Terms) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Term(org.apache.lucene.index.Term)

Example 18 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class FacetsCollector method doSearch.

private static TopDocs doSearch(IndexSearcher searcher, ScoreDoc after, Query q, int n, Sort sort, boolean doDocScores, boolean doMaxScore, Collector fc) throws IOException {
    int limit = searcher.getIndexReader().maxDoc();
    if (limit == 0) {
        limit = 1;
    }
    n = Math.min(n, limit);
    if (after != null && after.doc >= limit) {
        throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + after.doc + " limit=" + limit);
    }
    TopDocs topDocs = null;
    if (n == 0) {
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        searcher.search(q, MultiCollector.wrap(totalHitCountCollector, fc));
        topDocs = new TopDocs(totalHitCountCollector.getTotalHits(), new ScoreDoc[0], Float.NaN);
    } else {
        TopDocsCollector<?> hitsCollector;
        if (sort != null) {
            if (after != null && !(after instanceof FieldDoc)) {
                // remove this
                throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
            }
            boolean fillFields = true;
            hitsCollector = TopFieldCollector.create(sort, n, (FieldDoc) after, fillFields, doDocScores, doMaxScore);
        } else {
            hitsCollector = TopScoreDocCollector.create(n, after);
        }
        searcher.search(q, MultiCollector.wrap(hitsCollector, fc));
        topDocs = hitsCollector.topDocs();
    }
    return topDocs;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) FieldDoc(org.apache.lucene.search.FieldDoc) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 19 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class TestJoinUtil method testMinMaxDocs.

public void testMinMaxDocs() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
    int minChildDocsPerParent = 2;
    int maxChildDocsPerParent = 16;
    int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
    int[] childDocsPerParent = new int[numParents];
    for (int p = 0; p < numParents; p++) {
        String parentId = Integer.toString(p);
        Document parentDoc = new Document();
        parentDoc.add(new StringField("id", parentId, Field.Store.YES));
        parentDoc.add(new StringField("type", "to", Field.Store.NO));
        parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
        iw.addDocument(parentDoc);
        int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
        childDocsPerParent[p] = numChildren;
        for (int c = 0; c < numChildren; c++) {
            String childId = Integer.toString(p + c);
            Document childDoc = new Document();
            childDoc.add(new StringField("id", childId, Field.Store.YES));
            childDoc.add(new StringField("type", "from", Field.Store.NO));
            childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
            iw.addDocument(childDoc);
        }
    }
    iw.close();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
    for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
        values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
    }
    MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
    Query fromQuery = new TermQuery(new Term("type", "from"));
    Query toQuery = new TermQuery(new Term("type", "to"));
    int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
    for (int i = 1; i <= iters; i++) {
        final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
        int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
        int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
        if (VERBOSE) {
            System.out.println("iter=" + i);
            System.out.println("scoreMode=" + scoreMode);
            System.out.println("min=" + min);
            System.out.println("max=" + max);
        }
        Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
        TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(joinQuery, collector);
        int expectedCount = 0;
        for (int numChildDocs : childDocsPerParent) {
            if (numChildDocs >= min && numChildDocs <= max) {
                expectedCount++;
            }
        }
        assertEquals(expectedCount, collector.getTotalHits());
    }
    searcher.getIndexReader().close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) FieldValueQuery(org.apache.lucene.search.FieldValueQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MultiDocValues(org.apache.lucene.index.MultiDocValues) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) SortedDocValues(org.apache.lucene.index.SortedDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap)

Example 20 with TotalHitCountCollector

use of org.apache.lucene.search.TotalHitCountCollector in project lucene-solr by apache.

the class CommandHandler method searchWithTimeLimiter.

/**
   * Invokes search with the specified filter and collector.  
   * If a time limit has been specified then wrap the collector in the TimeLimitingCollector
   */
private void searchWithTimeLimiter(Query query, ProcessedFilter filter, Collector collector) throws IOException {
    if (queryCommand.getTimeAllowed() > 0) {
        collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), queryCommand.getTimeAllowed());
    }
    TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
    if (includeHitCount) {
        collector = MultiCollector.wrap(collector, hitCountCollector);
    }
    if (filter.filter != null) {
        query = new BooleanQuery.Builder().add(query, Occur.MUST).add(filter.filter, Occur.FILTER).build();
    }
    if (filter.postFilter != null) {
        filter.postFilter.setLastDelegate(collector);
        collector = filter.postFilter;
    }
    try {
        searcher.search(query, collector);
    } catch (TimeLimitingCollector.TimeExceededException | ExitableDirectoryReader.ExitingReaderException x) {
        partialResults = true;
        logger.warn("Query: " + query + "; " + x.getMessage());
    }
    if (includeHitCount) {
        totalHitCount = hitCountCollector.getTotalHits();
    }
}
Also used : TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector)

Aggregations

TotalHitCountCollector (org.apache.lucene.search.TotalHitCountCollector)32 TermQuery (org.apache.lucene.search.TermQuery)17 Term (org.apache.lucene.index.Term)13 BooleanQuery (org.apache.lucene.search.BooleanQuery)12 IOException (java.io.IOException)10 IndexSearcher (org.apache.lucene.search.IndexSearcher)10 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)10 Query (org.apache.lucene.search.Query)10 ArrayList (java.util.ArrayList)7 Searcher (org.elasticsearch.index.engine.Engine.Searcher)7 LongPoint (org.apache.lucene.document.LongPoint)6 Test (org.junit.Test)6 TopDocs (org.apache.lucene.search.TopDocs)5 HashSet (java.util.HashSet)4 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 Document (org.apache.lucene.document.Document)4 IndexReader (org.apache.lucene.index.IndexReader)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 BooleanClause (org.apache.lucene.search.BooleanClause)4 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)4