Examples with CollapsingTopDocsCollector - org.apache.lucene.search.grouping.CollapsingTopDocsCollector

Example 1 with CollapsingTopDocsCollector

use of org.apache.lucene.search.grouping.CollapsingTopDocsCollector in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method assertSearchCollapse.

private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
    final int numDocs = randomIntBetween(1000, 2000);
    int maxGroup = randomIntBetween(2, 500);
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Set<T> values = new HashSet<>();
    int totalHits = 0;
    for (int i = 0; i < numDocs; i++) {
        final T value = dvProducers.randomGroup(maxGroup);
        values.add(value);
        Document doc = new Document();
        dvProducers.add(doc, value, multivalued);
        doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
        doc.add(new NumericDocValuesField("sort2", randomLong()));
        w.addDocument(doc);
        totalHits++;
    }
    List<T> valueList = new ArrayList<>(values);
    Collections.sort(valueList);
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    final SortField collapseField = dvProducers.sortField(multivalued);
    final SortField sort1 = new SortField("sort1", SortField.Type.INT);
    final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
    Sort sort = new Sort(sort1, sort2, collapseField);
    int expectedNumGroups = values.size();
    final CollapsingTopDocsCollector collapsingCollector;
    if (numeric) {
        collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
    } else {
        collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
    }
    TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    searcher.search(new MatchAllDocsQuery(), topFieldCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    TopFieldDocs topDocs = topFieldCollector.topDocs();
    assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
    assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(totalHits, collapseTopFieldDocs.totalHits);
    assertEquals(totalHits, topDocs.scoreDocs.length);
    assertEquals(totalHits, topDocs.totalHits);
    Set<Object> seen = new HashSet<>();
    // collapse field is the last sort
    int collapseIndex = sort.getSort().length - 1;
    int topDocsIndex = 0;
    for (int i = 0; i < expectedNumGroups; i++) {
        FieldDoc fieldDoc = null;
        for (; topDocsIndex < totalHits; topDocsIndex++) {
            fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
            if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
                break;
            }
        }
        FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
        assertNotNull(fieldDoc);
        assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
        assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
        seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
    }
    for (; topDocsIndex < totalHits; topDocsIndex++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
        assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
    }
    // check merge
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final SegmentSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new SegmentSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new SegmentSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
    final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
    for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
        final SegmentSearcher subSearcher = subSearchers[shardIDX];
        final CollapsingTopDocsCollector c;
        if (numeric) {
            c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
        } else {
            c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
        }
        subSearcher.search(weight, c);
        shardHits[shardIDX] = c.getTopDocs();
    }
    CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
    assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
    w.close();
    reader.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 2 with CollapsingTopDocsCollector

use of org.apache.lucene.search.grouping.CollapsingTopDocsCollector in project elasticsearch by elastic.

the class QueryPhase method execute.

/**
     * In a package-private method so that it can be tested without having to
     * wire everything (mapperService, etc.)
     * @return whether the rescoring phase should be executed
     */
static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException {
    QuerySearchResult queryResult = searchContext.queryResult();
    queryResult.searchTimedOut(false);
    final boolean doProfile = searchContext.getProfilers() != null;
    final SearchType searchType = searchContext.searchType();
    boolean rescore = false;
    try {
        queryResult.from(searchContext.from());
        queryResult.size(searchContext.size());
        Query query = searchContext.query();
        final int totalNumDocs = searcher.getIndexReader().numDocs();
        int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs);
        Collector collector;
        Callable<TopDocs> topDocsCallable;
        DocValueFormat[] sortValueFormats = new DocValueFormat[0];
        // already rewritten
        assert query == searcher.rewrite(query);
        if (searchContext.size() == 0) {
            // no matter what the value of from is
            final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            collector = totalHitCountCollector;
            if (searchContext.getProfilers() != null) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    return new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0);
                }
            };
        } else {
            // Perhaps have a dedicated scroll phase?
            final ScrollContext scrollContext = searchContext.scrollContext();
            assert (scrollContext != null) == (searchContext.request().scroll() != null);
            final Collector topDocsCollector;
            ScoreDoc after = null;
            if (searchContext.request().scroll() != null) {
                numDocs = Math.min(searchContext.size(), totalNumDocs);
                after = scrollContext.lastEmittedDoc;
                if (returnsDocsInOrder(query, searchContext.sort())) {
                    if (scrollContext.totalHits == -1) {
                        // first round
                        assert scrollContext.lastEmittedDoc == null;
                    // there is not much that we can optimize here since we want to collect all
                    // documents in order to get the total number of hits
                    } else {
                        // skip to the desired doc and stop collecting after ${size} matches
                        if (scrollContext.lastEmittedDoc != null) {
                            BooleanQuery bq = new BooleanQuery.Builder().add(query, BooleanClause.Occur.MUST).add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER).build();
                            query = bq;
                        }
                        searchContext.terminateAfter(numDocs);
                    }
                }
            } else {
                after = searchContext.searchAfter();
            }
            if (totalNumDocs == 0) {
                // top collectors don't like a size of 0
                numDocs = 1;
            }
            assert numDocs > 0;
            if (searchContext.collapse() == null) {
                if (searchContext.sort() != null) {
                    SortAndFormats sf = searchContext.sort();
                    topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores());
                    sortValueFormats = sf.formats;
                } else {
                    rescore = !searchContext.rescore().isEmpty();
                    for (RescoreSearchContext rescoreContext : searchContext.rescore()) {
                        numDocs = Math.max(rescoreContext.window(), numDocs);
                    }
                    topDocsCollector = TopScoreDocCollector.create(numDocs, after);
                }
            } else {
                Sort sort = Sort.RELEVANCE;
                if (searchContext.sort() != null) {
                    sort = searchContext.sort().sort;
                }
                CollapseContext collapse = searchContext.collapse();
                topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores());
                if (searchContext.sort() == null) {
                    sortValueFormats = new DocValueFormat[] { DocValueFormat.RAW };
                } else {
                    sortValueFormats = searchContext.sort().formats;
                }
            }
            collector = topDocsCollector;
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList());
            }
            topDocsCallable = new Callable<TopDocs>() {

                @Override
                public TopDocs call() throws Exception {
                    final TopDocs topDocs;
                    if (topDocsCollector instanceof TopDocsCollector) {
                        topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs();
                    } else if (topDocsCollector instanceof CollapsingTopDocsCollector) {
                        topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs();
                    } else {
                        throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName());
                    }
                    if (scrollContext != null) {
                        if (scrollContext.totalHits == -1) {
                            // first round
                            scrollContext.totalHits = topDocs.totalHits;
                            scrollContext.maxScore = topDocs.getMaxScore();
                        } else {
                            // subsequent round: the total number of hits and
                            // the maximum score were computed on the first round
                            topDocs.totalHits = scrollContext.totalHits;
                            topDocs.setMaxScore(scrollContext.maxScore);
                        }
                        if (searchContext.request().numberOfShards() == 1) {
                            // if we fetch the document in the same roundtrip, we already know the last emitted doc
                            if (topDocs.scoreDocs.length > 0) {
                                // set the last emitted doc
                                scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1];
                            }
                        }
                    }
                    return topDocs;
                }
            };
        }
        final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER;
        if (terminateAfterSet) {
            final Collector child = collector;
            // throws Lucene.EarlyTerminationException when given count is reached
            collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (searchContext.parsedPostFilter() != null) {
            final Collector child = collector;
            // this will only get applied to the actual search collector and not
            // to any scoped collectors, also, it will only be applied to the main collector
            // since that is where the filter should only work
            final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false);
            collector = new FilteredCollector(collector, filterWeight);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        // plug in additional collectors, like aggregations
        final List<Collector> subCollectors = new ArrayList<>();
        subCollectors.add(collector);
        subCollectors.addAll(searchContext.queryCollectors().values());
        collector = MultiCollector.wrap(subCollectors);
        if (doProfile && collector instanceof InternalProfileCollector == false) {
            // When there is a single collector to wrap, MultiCollector returns it
            // directly, so only wrap in the case that there are several sub collectors
            final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() {

                @Override
                public InternalProfileCollector get(int index) {
                    return (InternalProfileCollector) subCollectors.get(index);
                }

                @Override
                public int size() {
                    return subCollectors.size();
                }
            };
            collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children);
        }
        // apply the minimum score after multi collector so we filter aggs as well
        if (searchContext.minimumScore() != null) {
            final Collector child = collector;
            collector = new MinimumScoreCollector(collector, searchContext.minimumScore());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector.getClass() == TotalHitCountCollector.class) {
            // instead of using a collector
            while (true) {
                // a constant_score query
                if (query instanceof ConstantScoreQuery) {
                    query = ((ConstantScoreQuery) query).getQuery();
                } else {
                    break;
                }
            }
            if (query.getClass() == MatchAllDocsQuery.class) {
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = searcher.getIndexReader().numDocs();
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) {
                final Term term = ((TermQuery) query).getTerm();
                collector = null;
                topDocsCallable = new Callable<TopDocs>() {

                    @Override
                    public TopDocs call() throws Exception {
                        int count = 0;
                        for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                            count += context.reader().docFreq(term);
                        }
                        return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0);
                    }
                };
            }
        }
        final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT);
        if (timeoutSet && collector != null) {
            // collector might be null if no collection is actually needed
            final Collector child = collector;
            // TODO: change to use our own counter that uses the scheduler in ThreadPool
            // throws TimeLimitingCollector.TimeExceededException when timeout has reached
            collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis());
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        if (collector != null) {
            final Collector child = collector;
            collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector);
            if (doProfile) {
                collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child));
            }
        }
        try {
            if (collector != null) {
                if (doProfile) {
                    searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector);
                }
                searcher.search(query, collector);
            }
        } catch (TimeLimitingCollector.TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            queryResult.searchTimedOut(true);
        } catch (Lucene.EarlyTerminationException e) {
            assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set";
            queryResult.terminatedEarly(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        if (terminateAfterSet && queryResult.terminatedEarly() == null) {
            queryResult.terminatedEarly(false);
        }
        queryResult.topDocs(topDocsCallable.call(), sortValueFormats);
        if (searchContext.getProfilers() != null) {
            ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(searchContext.getProfilers());
            searchContext.queryResult().profileResults(shardResults);
        }
        return rescore;
    } catch (Exception e) {
        throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e);
    }
}

Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) MinDocQuery(org.apache.lucene.queries.MinDocQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) FieldDoc(org.apache.lucene.search.FieldDoc) RescoreSearchContext(org.elasticsearch.search.rescore.RescoreSearchContext) ArrayList(java.util.ArrayList) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) Lucene(org.elasticsearch.common.lucene.Lucene) Callable(java.util.concurrent.Callable) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MinDocQuery(org.apache.lucene.queries.MinDocQuery) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) FilteredCollector(org.elasticsearch.common.lucene.search.FilteredCollector) MultiCollector(org.apache.lucene.search.MultiCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) MinimumScoreCollector(org.elasticsearch.common.lucene.MinimumScoreCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) Sort(org.apache.lucene.search.Sort) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) SearchType(org.elasticsearch.action.search.SearchType) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) ProfileShardResult(org.elasticsearch.search.profile.ProfileShardResult) AbstractList(java.util.AbstractList) DocValueFormat(org.elasticsearch.search.DocValueFormat) ScrollContext(org.elasticsearch.search.internal.ScrollContext) Term(org.apache.lucene.index.Term) SortAndFormats(org.elasticsearch.search.sort.SortAndFormats) Weight(org.apache.lucene.search.Weight) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) InternalProfileCollector(org.elasticsearch.search.profile.query.InternalProfileCollector) CollapseContext(org.elasticsearch.search.collapse.CollapseContext)

Example 3 with CollapsingTopDocsCollector

use of org.apache.lucene.search.grouping.CollapsingTopDocsCollector in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method testEmptyNumericSegment.

public void testEmptyNumericSegment() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("group", 0));
    w.addDocument(doc);
    doc.clear();
    doc.add(new NumericDocValuesField("group", 1));
    w.addDocument(doc);
    w.commit();
    doc.clear();
    doc.add(new NumericDocValuesField("group", 10));
    w.addDocument(doc);
    w.commit();
    doc.clear();
    doc.add(new NumericDocValuesField("category", 0));
    w.addDocument(doc);
    w.commit();
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    SortField sortField = new SortField("group", SortField.Type.LONG);
    sortField.setMissingValue(Long.MAX_VALUE);
    Sort sort = new Sort(sortField);
    final CollapsingTopDocsCollector collapsingCollector = CollapsingTopDocsCollector.createNumeric("group", sort, 10, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(4, collapseTopFieldDocs.collapseValues.length);
    assertEquals(0L, collapseTopFieldDocs.collapseValues[0]);
    assertEquals(1L, collapseTopFieldDocs.collapseValues[1]);
    assertEquals(10L, collapseTopFieldDocs.collapseValues[2]);
    assertNull(collapseTopFieldDocs.collapseValues[3]);
    w.close();
    reader.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 4 with CollapsingTopDocsCollector

use of org.apache.lucene.search.grouping.CollapsingTopDocsCollector in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method testEmptySortedSegment.

public void testEmptySortedSegment() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(new SortedDocValuesField("group", new BytesRef("0")));
    w.addDocument(doc);
    doc.clear();
    doc.add(new SortedDocValuesField("group", new BytesRef("1")));
    w.addDocument(doc);
    w.commit();
    doc.clear();
    doc.add(new SortedDocValuesField("group", new BytesRef("10")));
    w.addDocument(doc);
    w.commit();
    doc.clear();
    doc.add(new NumericDocValuesField("category", 0));
    w.addDocument(doc);
    w.commit();
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    Sort sort = new Sort(new SortField("group", SortField.Type.STRING_VAL));
    final CollapsingTopDocsCollector collapsingCollector = CollapsingTopDocsCollector.createKeyword("group", sort, 10, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(4, collapseTopFieldDocs.collapseValues.length);
    assertNull(collapseTopFieldDocs.collapseValues[0]);
    assertEquals(new BytesRef("0"), collapseTopFieldDocs.collapseValues[1]);
    assertEquals(new BytesRef("1"), collapseTopFieldDocs.collapseValues[2]);
    assertEquals(new BytesRef("10"), collapseTopFieldDocs.collapseValues[3]);
    w.close();
    reader.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) Sort(org.apache.lucene.search.Sort) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)4 Sort (org.apache.lucene.search.Sort)4 CollapsingTopDocsCollector (org.apache.lucene.search.grouping.CollapsingTopDocsCollector)4 Document (org.apache.lucene.document.Document)3 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)3 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)3 IndexReader (org.apache.lucene.index.IndexReader)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 SortField (org.apache.lucene.search.SortField)3 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)3 SortedSetSortField (org.apache.lucene.search.SortedSetSortField)3 CollapseTopFieldDocs (org.apache.lucene.search.grouping.CollapseTopFieldDocs)3 Directory (org.apache.lucene.store.Directory)3 ArrayList (java.util.ArrayList)2 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 FieldDoc (org.apache.lucene.search.FieldDoc)2 TopFieldCollector (org.apache.lucene.search.TopFieldCollector)2 Weight (org.apache.lucene.search.Weight)2 AbstractList (java.util.AbstractList)1