Search in sources :

Example 1 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method assertSearchCollapse.

private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
    final int numDocs = randomIntBetween(1000, 2000);
    int maxGroup = randomIntBetween(2, 500);
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Set<T> values = new HashSet<>();
    int totalHits = 0;
    for (int i = 0; i < numDocs; i++) {
        final T value = dvProducers.randomGroup(maxGroup);
        values.add(value);
        Document doc = new Document();
        dvProducers.add(doc, value, multivalued);
        doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
        doc.add(new NumericDocValuesField("sort2", randomLong()));
        w.addDocument(doc);
        totalHits++;
    }
    List<T> valueList = new ArrayList<>(values);
    Collections.sort(valueList);
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    final SortField collapseField = dvProducers.sortField(multivalued);
    final SortField sort1 = new SortField("sort1", SortField.Type.INT);
    final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
    Sort sort = new Sort(sort1, sort2, collapseField);
    int expectedNumGroups = values.size();
    final CollapsingTopDocsCollector collapsingCollector;
    if (numeric) {
        collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
    } else {
        collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
    }
    TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    searcher.search(new MatchAllDocsQuery(), topFieldCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    TopFieldDocs topDocs = topFieldCollector.topDocs();
    assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
    assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(totalHits, collapseTopFieldDocs.totalHits);
    assertEquals(totalHits, topDocs.scoreDocs.length);
    assertEquals(totalHits, topDocs.totalHits);
    Set<Object> seen = new HashSet<>();
    // collapse field is the last sort
    int collapseIndex = sort.getSort().length - 1;
    int topDocsIndex = 0;
    for (int i = 0; i < expectedNumGroups; i++) {
        FieldDoc fieldDoc = null;
        for (; topDocsIndex < totalHits; topDocsIndex++) {
            fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
            if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
                break;
            }
        }
        FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
        assertNotNull(fieldDoc);
        assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
        assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
        seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
    }
    for (; topDocsIndex < totalHits; topDocsIndex++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
        assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
    }
    // check merge
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final SegmentSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new SegmentSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new SegmentSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
    final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
    for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
        final SegmentSearcher subSearcher = subSearchers[shardIDX];
        final CollapsingTopDocsCollector c;
        if (numeric) {
            c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
        } else {
            c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
        }
        subSearcher.search(weight, c);
        shardHits[shardIDX] = c.getTopDocs();
    }
    CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
    assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 2 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project neo4j-mobile-android by neo4j-contrib.

the class HitDoc method getMoreDocs.

/**
   * Tries to add new documents to hitDocs.
   * Ensures that the hit numbered <code>min</code> has been retrieved.
   */
private final void getMoreDocs(int min) throws IOException {
    if (hitDocs.size() > min) {
        min = hitDocs.size();
    }
    // double # retrieved
    int n = min * 2;
    //  TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n) : searcher.search(weight, filter, n, sort);
    TopDocs topDocs = null;
    if (sort == null) {
        topDocs = searcher.search(weight, filter, n);
    } else {
        if (this.score) {
            TopFieldCollector collector = LuceneDataSource.scoringCollector(sort, n);
            searcher.search(weight, null, collector);
            topDocs = collector.topDocs();
        } else {
            topDocs = searcher.search(weight, filter, n, sort);
        }
    }
    length = topDocs.totalHits;
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    float scoreNorm = 1.0f;
    if (length > 0 && topDocs.getMaxScore() > 1.0f) {
        scoreNorm = 1.0f / topDocs.getMaxScore();
    }
    int start = hitDocs.size() - nDeletedHits;
    // any new deletions?
    int nDels2 = countDeletions(searcher);
    debugCheckedForDeletions = false;
    if (nDeletions < 0 || nDels2 > nDeletions) {
        // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
        nDeletedHits = 0;
        debugCheckedForDeletions = true;
        int i2 = 0;
        for (int i1 = 0; i1 < hitDocs.size() && i2 < scoreDocs.length; i1++) {
            int id1 = ((HitDoc) hitDocs.get(i1)).id;
            int id2 = scoreDocs[i2].doc;
            if (id1 == id2) {
                i2++;
            } else {
                nDeletedHits++;
            }
        }
        start = i2;
    }
    int end = scoreDocs.length < length ? scoreDocs.length : length;
    length += nDeletedHits;
    for (int i = start; i < end; i++) {
        hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm, scoreDocs[i].doc));
    }
    nDeletions = nDels2;
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 3 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project jackrabbit by apache.

the class SortedLuceneQueryHits method getHits.

//-------------------------------< internal >-------------------------------
private void getHits() throws IOException {
    TopFieldCollector collector = TopFieldCollector.create(sort, numHits, false, true, false, false);
    searcher.search(query, collector);
    size = collector.getTotalHits();
    offset += scoreDocs.length;
    scoreDocs = collector.topDocs(offset, numHits).scoreDocs;
    log.debug("getHits() {}/{}", scoreDocs.length, numHits);
    // double hits for next round
    numHits *= 2;
}
Also used : TopFieldCollector(org.apache.lucene.search.TopFieldCollector)

Example 4 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project lucene-solr by apache.

the class TestIndexSorting method testRandom3.

// pits index time sorting against query time sorting
public void testRandom3() throws Exception {
    int numDocs;
    if (TEST_NIGHTLY) {
        numDocs = atLeast(100000);
    } else {
        numDocs = atLeast(1000);
    }
    List<RandomDoc> docs = new ArrayList<>();
    Sort sort = randomSort();
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
    }
    // no index sorting, all search-time sorting:
    Directory dir1 = newFSDirectory(createTempDir());
    IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w1 = new IndexWriter(dir1, iwc1);
    // use index sorting:
    Directory dir2 = newFSDirectory(createTempDir());
    IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc2.setIndexSort(sort);
    IndexWriter w2 = new IndexWriter(dir2, iwc2);
    Set<Integer> toDelete = new HashSet<>();
    double deleteChance = random().nextDouble();
    for (int id = 0; id < numDocs; id++) {
        RandomDoc docValues = new RandomDoc(id);
        docs.add(docValues);
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
            System.out.println("  int=" + docValues.intValue);
            System.out.println("  long=" + docValues.longValue);
            System.out.println("  float=" + docValues.floatValue);
            System.out.println("  double=" + docValues.doubleValue);
            System.out.println("  bytes=" + new BytesRef(docValues.bytesValue));
        }
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
        doc.add(new NumericDocValuesField("id", id));
        doc.add(new NumericDocValuesField("int", docValues.intValue));
        doc.add(new NumericDocValuesField("long", docValues.longValue));
        doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
        doc.add(new FloatDocValuesField("float", docValues.floatValue));
        doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue)));
        for (int value : docValues.intValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_int", value));
        }
        for (long value : docValues.longValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_long", value));
        }
        for (float value : docValues.floatValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_float", NumericUtils.floatToSortableInt(value)));
        }
        for (double value : docValues.doubleValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_double", NumericUtils.doubleToSortableLong(value)));
        }
        for (byte[] value : docValues.bytesValues) {
            doc.add(new SortedSetDocValuesField("multi_valued_bytes", new BytesRef(value)));
        }
        w1.addDocument(doc);
        w2.addDocument(doc);
        if (random().nextDouble() < deleteChance) {
            toDelete.add(id);
        }
    }
    for (int id : toDelete) {
        w1.deleteDocuments(new Term("id", Integer.toString(id)));
        w2.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    DirectoryReader r1 = DirectoryReader.open(w1);
    IndexSearcher s1 = newSearcher(r1);
    if (random().nextBoolean()) {
        int maxSegmentCount = TestUtil.nextInt(random(), 1, 5);
        if (VERBOSE) {
            System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")");
        }
        w2.forceMerge(maxSegmentCount);
    }
    DirectoryReader r2 = DirectoryReader.open(w2);
    IndexSearcher s2 = newSearcher(r2);
    for (int iter = 0; iter < 100; iter++) {
        int numHits = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
        }
        TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true);
        s1.search(new MatchAllDocsQuery(), c1);
        TopDocs hits1 = c1.topDocs();
        TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true);
        EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits);
        s2.search(new MatchAllDocsQuery(), c3);
        TopDocs hits2 = c2.topDocs();
        if (VERBOSE) {
            System.out.println("  topDocs query-time sort: totalHits=" + hits1.totalHits);
            for (ScoreDoc scoreDoc : hits1.scoreDocs) {
                System.out.println("    " + scoreDoc.doc);
            }
            System.out.println("  topDocs index-time sort: totalHits=" + hits2.totalHits);
            for (ScoreDoc scoreDoc : hits2.scoreDocs) {
                System.out.println("    " + scoreDoc.doc);
            }
        }
        assertTrue(hits2.totalHits <= hits1.totalHits);
        assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length);
        for (int i = 0; i < hits2.scoreDocs.length; i++) {
            ScoreDoc hit1 = hits1.scoreDocs[i];
            ScoreDoc hit2 = hits2.scoreDocs[i];
            assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
            assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
        }
    }
    IOUtils.close(r1, r2, w1, w2, dir1, dir2);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Sort(org.apache.lucene.search.Sort) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField)

Example 5 with TopFieldCollector

use of org.apache.lucene.search.TopFieldCollector in project lucene-solr by apache.

the class DrillSideways method search.

/**
   * Search, sorting by {@link Sort}, and computing
   * drill down and sideways counts.
   */
public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
    if (filter != null) {
        query = new DrillDownQuery(config, filter, query);
    }
    if (sort != null) {
        int limit = searcher.getIndexReader().maxDoc();
        if (limit == 0) {
            // the collector does not alow numHits = 0
            limit = 1;
        }
        final int fTopN = Math.min(topN, limit);
        if (executor != null) {
            // We have an executor, let use the multi-threaded version
            final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {

                @Override
                public TopFieldCollector newCollector() throws IOException {
                    return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
                }

                @Override
                public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
                    final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
                    int pos = 0;
                    for (TopFieldCollector collector : collectors) topFieldDocs[pos++] = collector.topDocs();
                    return TopDocs.merge(sort, topN, topFieldDocs);
                }
            };
            ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
            return new DrillSidewaysResult(r.facets, r.collectorResult);
        } else {
            final TopFieldCollector hitCollector = TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
            DrillSidewaysResult r = search(query, hitCollector);
            return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
        }
    } else {
        return search(after, query, topN);
    }
}
Also used : Collection(java.util.Collection) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) MultiCollectorManager(org.apache.lucene.search.MultiCollectorManager) CollectorManager(org.apache.lucene.search.CollectorManager) TopFieldCollector(org.apache.lucene.search.TopFieldCollector)

Aggregations

TopFieldCollector (org.apache.lucene.search.TopFieldCollector)13 TopDocs (org.apache.lucene.search.TopDocs)9 Sort (org.apache.lucene.search.Sort)7 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 Document (org.apache.lucene.document.Document)4 ScoreDoc (org.apache.lucene.search.ScoreDoc)4 Directory (org.apache.lucene.store.Directory)4 ArrayList (java.util.ArrayList)3 Collector (org.apache.lucene.search.Collector)3 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)3 HashSet (java.util.HashSet)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)2 StringField (org.apache.lucene.document.StringField)2 IndexReader (org.apache.lucene.index.IndexReader)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 EarlyTerminatingSortingCollector (org.apache.lucene.search.EarlyTerminatingSortingCollector)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2 TopScoreDocCollector (org.apache.lucene.search.TopScoreDocCollector)2 Bits (org.apache.lucene.util.Bits)2