Search in sources :

Example 1 with CompositeReaderContext

use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.

the class ShardUtilsTests method testExtractShardId.

public void testExtractShardId() throws IOException {
    BaseDirectoryWrapper dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
    writer.commit();
    ShardId id = new ShardId("foo", "_na_", random().nextInt());
    try (DirectoryReader reader = DirectoryReader.open(writer)) {
        ElasticsearchDirectoryReader wrap = ElasticsearchDirectoryReader.wrap(reader, id);
        assertEquals(id, ShardUtils.extractShardId(wrap));
    }
    final int numDocs = 1 + random().nextInt(5);
    for (int i = 0; i < numDocs; i++) {
        Document d = new Document();
        d.add(newField("name", "foobar", StringField.TYPE_STORED));
        writer.addDocument(d);
        if (random().nextBoolean()) {
            writer.commit();
        }
    }
    try (DirectoryReader reader = DirectoryReader.open(writer)) {
        ElasticsearchDirectoryReader wrap = ElasticsearchDirectoryReader.wrap(reader, id);
        assertEquals(id, ShardUtils.extractShardId(wrap));
        CompositeReaderContext context = wrap.getContext();
        for (LeafReaderContext leaf : context.leaves()) {
            assertEquals(id, ShardUtils.extractShardId(leaf.reader()));
        }
    }
    IOUtils.close(writer, dir);
}
Also used : ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document)

Example 2 with CompositeReaderContext

use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method assertSearchCollapse.

private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
    final int numDocs = randomIntBetween(1000, 2000);
    int maxGroup = randomIntBetween(2, 500);
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Set<T> values = new HashSet<>();
    int totalHits = 0;
    for (int i = 0; i < numDocs; i++) {
        final T value = dvProducers.randomGroup(maxGroup);
        values.add(value);
        Document doc = new Document();
        dvProducers.add(doc, value, multivalued);
        doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
        doc.add(new NumericDocValuesField("sort2", randomLong()));
        w.addDocument(doc);
        totalHits++;
    }
    List<T> valueList = new ArrayList<>(values);
    Collections.sort(valueList);
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    final SortField collapseField = dvProducers.sortField(multivalued);
    final SortField sort1 = new SortField("sort1", SortField.Type.INT);
    final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
    Sort sort = new Sort(sort1, sort2, collapseField);
    int expectedNumGroups = values.size();
    final CollapsingTopDocsCollector collapsingCollector;
    if (numeric) {
        collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
    } else {
        collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
    }
    TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    searcher.search(new MatchAllDocsQuery(), topFieldCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    TopFieldDocs topDocs = topFieldCollector.topDocs();
    assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
    assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(totalHits, collapseTopFieldDocs.totalHits);
    assertEquals(totalHits, topDocs.scoreDocs.length);
    assertEquals(totalHits, topDocs.totalHits);
    Set<Object> seen = new HashSet<>();
    // collapse field is the last sort
    int collapseIndex = sort.getSort().length - 1;
    int topDocsIndex = 0;
    for (int i = 0; i < expectedNumGroups; i++) {
        FieldDoc fieldDoc = null;
        for (; topDocsIndex < totalHits; topDocsIndex++) {
            fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
            if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
                break;
            }
        }
        FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
        assertNotNull(fieldDoc);
        assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
        assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
        seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
    }
    for (; topDocsIndex < totalHits; topDocsIndex++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
        assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
    }
    // check merge
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final SegmentSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new SegmentSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new SegmentSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
    final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
    for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
        final SegmentSearcher subSearcher = subSearchers[shardIDX];
        final CollapsingTopDocsCollector c;
        if (numeric) {
            c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
        } else {
            c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
        }
        subSearcher.search(weight, c);
        shardHits[shardIDX] = c.getTopDocs();
    }
    CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
    assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 3 with CompositeReaderContext

use of org.apache.lucene.index.CompositeReaderContext in project lucene-solr by apache.

the class TestTopDocsMerge method testSort.

void testSort(boolean useFrom) throws Exception {
    IndexReader reader = null;
    Directory dir = null;
    final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
    final String[] tokens = new String[] { "a", "b", "c", "d", "e" };
    if (VERBOSE) {
        System.out.println("TEST: make index");
    }
    {
        dir = newDirectory();
        final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
        // w.setDoRandomForceMerge(false);
        // w.w.getConfig().setMaxBufferedDocs(atLeast(100));
        final String[] content = new String[atLeast(20)];
        for (int contentIDX = 0; contentIDX < content.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            final int numTokens = TestUtil.nextInt(random(), 1, 10);
            for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) {
                sb.append(tokens[random().nextInt(tokens.length)]).append(' ');
            }
            content[contentIDX] = sb.toString();
        }
        for (int docIDX = 0; docIDX < numDocs; docIDX++) {
            final Document doc = new Document();
            doc.add(new SortedDocValuesField("string", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
            doc.add(newTextField("text", content[random().nextInt(content.length)], Field.Store.NO));
            doc.add(new FloatDocValuesField("float", random().nextFloat()));
            final int intValue;
            if (random().nextInt(100) == 17) {
                intValue = Integer.MIN_VALUE;
            } else if (random().nextInt(100) == 17) {
                intValue = Integer.MAX_VALUE;
            } else {
                intValue = random().nextInt();
            }
            doc.add(new NumericDocValuesField("int", intValue));
            if (VERBOSE) {
                System.out.println("  doc=" + doc);
            }
            w.addDocument(doc);
        }
        reader = w.getReader();
        w.close();
    }
    // NOTE: sometimes reader has just one segment, which is
    // important to test
    final IndexSearcher searcher = newSearcher(reader);
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final ShardSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new ShardSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new ShardSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final List<SortField> sortFields = new ArrayList<>();
    sortFields.add(new SortField("string", SortField.Type.STRING, true));
    sortFields.add(new SortField("string", SortField.Type.STRING, false));
    sortFields.add(new SortField("int", SortField.Type.INT, true));
    sortFields.add(new SortField("int", SortField.Type.INT, false));
    sortFields.add(new SortField("float", SortField.Type.FLOAT, true));
    sortFields.add(new SortField("float", SortField.Type.FLOAT, false));
    sortFields.add(new SortField(null, SortField.Type.SCORE, true));
    sortFields.add(new SortField(null, SortField.Type.SCORE, false));
    sortFields.add(new SortField(null, SortField.Type.DOC, true));
    sortFields.add(new SortField(null, SortField.Type.DOC, false));
    int numIters = atLeast(300);
    for (int iter = 0; iter < numIters; iter++) {
        // TODO: custom FieldComp...
        final Query query = new TermQuery(new Term("text", tokens[random().nextInt(tokens.length)]));
        final Sort sort;
        if (random().nextInt(10) == 4) {
            // Sort by score
            sort = null;
        } else {
            final SortField[] randomSortFields = new SortField[TestUtil.nextInt(random(), 1, 3)];
            for (int sortIDX = 0; sortIDX < randomSortFields.length; sortIDX++) {
                randomSortFields[sortIDX] = sortFields.get(random().nextInt(sortFields.size()));
            }
            sort = new Sort(randomSortFields);
        }
        final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
        if (VERBOSE) {
            System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
        }
        int from = -1;
        int size = -1;
        // First search on whole index:
        final TopDocs topHits;
        if (sort == null) {
            if (useFrom) {
                TopScoreDocCollector c = TopScoreDocCollector.create(numHits);
                searcher.search(query, c);
                from = TestUtil.nextInt(random(), 0, numHits - 1);
                size = numHits - from;
                TopDocs tempTopHits = c.topDocs();
                if (from < tempTopHits.scoreDocs.length) {
                    // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                    // than TopDocs#merge currently has
                    ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
                    System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
                    tempTopHits.scoreDocs = newScoreDocs;
                    topHits = tempTopHits;
                } else {
                    topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
                }
            } else {
                topHits = searcher.search(query, numHits);
            }
        } else {
            final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
            searcher.search(query, c);
            if (useFrom) {
                from = TestUtil.nextInt(random(), 0, numHits - 1);
                size = numHits - from;
                TopDocs tempTopHits = c.topDocs();
                if (from < tempTopHits.scoreDocs.length) {
                    // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                    // than TopDocs#merge currently has
                    ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
                    System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
                    tempTopHits.scoreDocs = newScoreDocs;
                    topHits = tempTopHits;
                } else {
                    topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
                }
            } else {
                topHits = c.topDocs(0, numHits);
            }
        }
        if (VERBOSE) {
            if (useFrom) {
                System.out.println("from=" + from + " size=" + size);
            }
            System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
            if (topHits.scoreDocs != null) {
                for (int hitIDX = 0; hitIDX < topHits.scoreDocs.length; hitIDX++) {
                    final ScoreDoc sd = topHits.scoreDocs[hitIDX];
                    System.out.println("    doc=" + sd.doc + " score=" + sd.score);
                }
            }
        }
        // ... then all shards:
        final Weight w = searcher.createNormalizedWeight(query, true);
        final TopDocs[] shardHits;
        if (sort == null) {
            shardHits = new TopDocs[subSearchers.length];
        } else {
            shardHits = new TopFieldDocs[subSearchers.length];
        }
        for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
            final TopDocs subHits;
            final ShardSearcher subSearcher = subSearchers[shardIDX];
            if (sort == null) {
                subHits = subSearcher.search(w, numHits);
            } else {
                final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
                subSearcher.search(w, c);
                subHits = c.topDocs(0, numHits);
            }
            shardHits[shardIDX] = subHits;
            if (VERBOSE) {
                System.out.println("  shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
                if (subHits.scoreDocs != null) {
                    for (ScoreDoc sd : subHits.scoreDocs) {
                        System.out.println("    doc=" + sd.doc + " score=" + sd.score);
                    }
                }
            }
        }
        // Merge:
        final TopDocs mergedHits;
        if (useFrom) {
            if (sort == null) {
                mergedHits = TopDocs.merge(from, size, shardHits, true);
            } else {
                mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
            }
        } else {
            if (sort == null) {
                mergedHits = TopDocs.merge(numHits, shardHits);
            } else {
                mergedHits = TopDocs.merge(sort, numHits, (TopFieldDocs[]) shardHits);
            }
        }
        if (mergedHits.scoreDocs != null) {
            // Make sure the returned shards are correct:
            for (int hitIDX = 0; hitIDX < mergedHits.scoreDocs.length; hitIDX++) {
                final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
                assertEquals("doc=" + sd.doc + " wrong shard", ReaderUtil.subIndex(sd.doc, docStarts), sd.shardIndex);
            }
        }
        TestUtil.assertEquals(topHits, mergedHits);
    }
    reader.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) Term(org.apache.lucene.index.Term) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 4 with CompositeReaderContext

use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.

the class AggregatorTestCase method searchAndReduce.

/**
     * Divides the provided {@link IndexSearcher} in sub-searcher, one for each segment,
     * builds an aggregator for each sub-searcher filtered by the provided {@link Query} and
     * returns the reduced {@link InternalAggregation}.
     */
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(IndexSearcher searcher, Query query, AggregationBuilder builder, MappedFieldType... fieldTypes) throws IOException {
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final ShardSearcher[] subSearchers;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new ShardSearcher[1];
        subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new ShardSearcher[size];
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
        }
    }
    List<InternalAggregation> aggs = new ArrayList<>();
    Query rewritten = searcher.rewrite(query);
    Weight weight = searcher.createWeight(rewritten, true);
    C root = createAggregator(builder, searcher, fieldTypes);
    try {
        for (ShardSearcher subSearcher : subSearchers) {
            C a = createAggregator(builder, subSearcher, fieldTypes);
            a.preCollection();
            subSearcher.search(weight, a);
            a.postCollection();
            aggs.add(a.buildAggregation(0L));
        }
        if (aggs.isEmpty()) {
            return null;
        } else {
            if (randomBoolean()) {
                // sometimes do an incremental reduce
                List<InternalAggregation> internalAggregations = randomSubsetOf(randomIntBetween(1, aggs.size()), aggs);
                A internalAgg = (A) aggs.get(0).doReduce(internalAggregations, new InternalAggregation.ReduceContext(root.context().bigArrays(), null, false));
                aggs.removeAll(internalAggregations);
                aggs.add(internalAgg);
            }
            // now do the final reduce
            @SuppressWarnings("unchecked") A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null, true));
            return internalAgg;
        }
    } finally {
        Releasables.close(releasables);
        releasables.clear();
    }
}
Also used : Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Aggregations

CompositeReaderContext (org.apache.lucene.index.CompositeReaderContext)4 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 ArrayList (java.util.ArrayList)3 Document (org.apache.lucene.document.Document)3 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)3 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 IndexReader (org.apache.lucene.index.IndexReader)2 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)2 Weight (org.apache.lucene.search.Weight)2 Directory (org.apache.lucene.store.Directory)2 HashSet (java.util.HashSet)1 FloatDocValuesField (org.apache.lucene.document.FloatDocValuesField)1 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)1 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 Term (org.apache.lucene.index.Term)1 FieldDoc (org.apache.lucene.search.FieldDoc)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)1