Search in sources :

Example 1 with CachingCollector

use of org.apache.lucene.search.CachingCollector in project lucene-solr by apache.

the class TestGrouping method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        //final int numDocs = _TestUtil.nextInt(random, 5, 20);
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string
                // groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
        Field group = newStringField("group", "", Field.Store.NO);
        doc.add(group);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                group.setStringValue(groupDoc.group.utf8ToString());
                idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
            } else {
                // TODO: not true
                // Must explicitly set empty string, else eg if
                // the segment has all docs missing the field then
                // we get null back instead of empty BytesRef:
                idvGroupField.setBytesValue(new BytesRef());
            }
            sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
            sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
        System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        int[] docIDToID = new int[r.maxDoc()];
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToID[i] = (int) values.longValue();
        }
        DirectoryReader rBlocks = null;
        Directory dirBlocks = null;
        final IndexSearcher s = newSearcher(r);
        if (VERBOSE) {
            System.out.println("\nTEST: searcher=" + s);
        }
        final ShardState shards = new ShardState(s);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToID[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
                assertEquals(gd.id, idValue);
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        // Build 2nd index, where docs are added in blocks by
        // group, so we can use single pass collector
        dirBlocks = newDirectory();
        rBlocks = getDocBlockReader(dirBlocks, groupDocs);
        final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
        final IndexSearcher sBlocks = newSearcher(rBlocks);
        final ShardState shardsBlocks = new ShardState(sBlocks);
        // ReaderBlocks only increases maxDoc() vs reader, which
        // means a monotonic shift in scores, so we can
        // reliably remap them w/ Map:
        final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
        values = MultiDocValues.getNumericValues(rBlocks, "id");
        assertNotNull(values);
        int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
        for (int i = 0; i < rBlocks.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToIDBlocks[i] = (int) values.longValue();
        }
        //System.out.println("fixup score2");
        for (int contentID = 0; contentID < 3; contentID++) {
            //System.out.println("  term=real" + contentID);
            final Map<Float, Float> termScoreMap = new HashMap<>();
            scoreMap.put("real" + contentID, termScoreMap);
            //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
            //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
            final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
                assertTrue(gd.score2 == 0.0);
                gd.score2 = hit.score;
                assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
                //System.out.println("    score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
                termScoreMap.put(gd.score, gd.score2);
            }
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            final boolean fillFields = random().nextBoolean();
            boolean getScores = random().nextBoolean();
            final boolean getMaxScores = random().nextBoolean();
            final Sort groupSort = getRandomSort();
            //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
            final Sort docSort = getRandomSort();
            getScores |= (groupSort.needsScores() || docSort.needsScores());
            final int topNGroups = TestUtil.nextInt(random(), 1, 30);
            //final int topNGroups = 10;
            final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
            final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
            //final int groupOffset = 0;
            final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
            //final int docOffset = 0;
            final boolean doCache = random().nextBoolean();
            final boolean doAllGroups = random().nextBoolean();
            if (VERBOSE) {
                System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
            }
            String groupField = "group";
            if (VERBOSE) {
                System.out.println("  groupField=" + groupField);
            }
            final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
            final CachingCollector cCache;
            final Collector c;
            final AllGroupsCollector<?> allGroupsCollector;
            if (doAllGroups) {
                allGroupsCollector = createAllGroupsCollector(c1, groupField);
            } else {
                allGroupsCollector = null;
            }
            final boolean useWrappingCollector = random().nextBoolean();
            if (doCache) {
                final double maxCacheMB = random().nextDouble();
                if (VERBOSE) {
                    System.out.println("TEST: maxCacheMB=" + maxCacheMB);
                }
                if (useWrappingCollector) {
                    if (doAllGroups) {
                        cCache = CachingCollector.create(c1, true, maxCacheMB);
                        c = MultiCollector.wrap(cCache, allGroupsCollector);
                    } else {
                        c = cCache = CachingCollector.create(c1, true, maxCacheMB);
                    }
                } else {
                    // Collect only into cache, then replay multiple times:
                    c = cCache = CachingCollector.create(true, maxCacheMB);
                }
            } else {
                cCache = null;
                if (doAllGroups) {
                    c = MultiCollector.wrap(c1, allGroupsCollector);
                } else {
                    c = c1;
                }
            }
            // Search top reader:
            final Query query = new TermQuery(new Term("content", searchTerm));
            s.search(query, c);
            if (doCache && !useWrappingCollector) {
                if (cCache.isCached()) {
                    // Replay for first-pass grouping
                    cCache.replay(c1);
                    if (doAllGroups) {
                        // Replay for all groups:
                        cCache.replay(allGroupsCollector);
                    }
                } else {
                    // Replay by re-running search:
                    s.search(query, c1);
                    if (doAllGroups) {
                        s.search(query, allGroupsCollector);
                    }
                }
            }
            // Get 1st pass top groups
            final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
            final TopGroups<BytesRef> groupsResult;
            if (VERBOSE) {
                System.out.println("TEST: first pass topGroups");
                if (topGroups == null) {
                    System.out.println("  null");
                } else {
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
            }
            // Get 1st pass top groups using shards
            final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
            final TopGroupsCollector<?> c2;
            if (topGroups != null) {
                if (VERBOSE) {
                    System.out.println("TEST: topGroups");
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
                c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
                if (doCache) {
                    if (cCache.isCached()) {
                        if (VERBOSE) {
                            System.out.println("TEST: cache is intact");
                        }
                        cCache.replay(c2);
                    } else {
                        if (VERBOSE) {
                            System.out.println("TEST: cache was too large");
                        }
                        s.search(query, c2);
                    }
                } else {
                    s.search(query, c2);
                }
                if (doAllGroups) {
                    TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
                    groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
                } else {
                    groupsResult = getTopGroups(c2, docOffset);
                }
            } else {
                c2 = null;
                groupsResult = null;
                if (VERBOSE) {
                    System.out.println("TEST:   no results");
                }
            }
            final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
            if (VERBOSE) {
                if (expectedGroups == null) {
                    System.out.println("TEST: no expected groups");
                } else {
                    System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + sd.doc + " score=" + sd.score);
                        }
                    }
                }
                if (groupsResult == null) {
                    System.out.println("TEST: no matched groups");
                } else {
                    System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : groupsResult.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                    if (searchIter == 14) {
                        for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
                            System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
                        }
                    }
                }
                if (topGroupsShards == null) {
                    System.out.println("TEST: no matched-merged groups");
                } else {
                    System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                }
            }
            assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
            // Confirm merged shards match:
            assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
            if (topGroupsShards != null) {
                verifyShards(shards.docStarts, topGroupsShards);
            }
            final boolean needsScores = getScores || getMaxScores || docSort == null;
            final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
            final AllGroupsCollector<BytesRef> allGroupsCollector2;
            final Collector c4;
            if (doAllGroups) {
                // NOTE: must be "group" and not "group_dv"
                // (groupField) because we didn't index doc
                // values in the block index:
                allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
                c4 = MultiCollector.wrap(c3, allGroupsCollector2);
            } else {
                allGroupsCollector2 = null;
                c4 = c3;
            }
            // Get block grouping result:
            sBlocks.search(query, c4);
            @SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
            final TopGroups<BytesRef> groupsResultBlocks;
            if (doAllGroups && tempTopGroupsBlocks != null) {
                assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
                groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
            } else {
                groupsResultBlocks = tempTopGroupsBlocks;
            }
            if (VERBOSE) {
                if (groupsResultBlocks == null) {
                    System.out.println("TEST: no block groups");
                } else {
                    System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
                    boolean first = true;
                    for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
                            if (first) {
                                System.out.println("explain: " + sBlocks.explain(query, sd.doc));
                                first = false;
                            }
                        }
                    }
                }
            }
            // Get shard'd block grouping result:
            final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
            if (expectedGroups != null) {
                // Fixup scores for reader2
                for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                    for (ScoreDoc hit : groupDocsHits.scoreDocs) {
                        final GroupDoc gd = groupDocsByID[hit.doc];
                        assertEquals(gd.id, hit.doc);
                        //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
                        hit.score = gd.score2;
                    }
                }
                final SortField[] sortFields = groupSort.getSort();
                final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
                for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
                    if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            if (groupDocsHits.groupSortValues != null) {
                                //System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
                                groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
                                assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
                            }
                        }
                    }
                }
                final SortField[] docSortFields = docSort.getSort();
                for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
                    if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
                                FieldDoc hit = (FieldDoc) _hit;
                                if (hit.fields != null) {
                                    hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
                                    assertNotNull(hit.fields[docSortIDX]);
                                }
                            }
                        }
                    }
                }
            }
            assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
            assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
        }
        r.close();
        dir.close();
        rBlocks.close();
        dirBlocks.close();
    }
}
Also used : Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) CachingCollector(org.apache.lucene.search.CachingCollector) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) StringField(org.apache.lucene.document.StringField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MultiCollector(org.apache.lucene.search.MultiCollector) Collector(org.apache.lucene.search.Collector) CachingCollector(org.apache.lucene.search.CachingCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery)

Example 2 with CachingCollector

use of org.apache.lucene.search.CachingCollector in project lucene-solr by apache.

the class Grouping method execute.

public void execute() throws IOException {
    if (commands.isEmpty()) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least one field, function or query to group by.");
    }
    DocListAndSet out = new DocListAndSet();
    qr.setDocListAndSet(out);
    SolrIndexSearcher.ProcessedFilter pf = searcher.getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
    final Filter luceneFilter = pf.filter;
    maxDoc = searcher.maxDoc();
    needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
    boolean cacheScores = false;
    // NOTE: Change this when withinGroupSort can be specified per group
    if (!needScores && !commands.isEmpty()) {
        Sort withinGroupSort = commands.get(0).withinGroupSort;
        cacheScores = withinGroupSort == null || withinGroupSort.needsScores();
    } else if (needScores) {
        cacheScores = needScores;
    }
    getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
    getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
    query = QueryUtils.makeQueryable(cmd.getQuery());
    for (Command cmd : commands) {
        cmd.prepare();
    }
    AllGroupHeadsCollector<?> allGroupHeadsCollector = null;
    List<Collector> collectors = new ArrayList<>(commands.size());
    for (Command cmd : commands) {
        Collector collector = cmd.createFirstPassCollector();
        if (collector != null) {
            collectors.add(collector);
        }
        if (getGroupedDocSet && allGroupHeadsCollector == null) {
            collectors.add(allGroupHeadsCollector = cmd.createAllGroupCollector());
        }
    }
    DocSetCollector setCollector = null;
    if (getDocSet && allGroupHeadsCollector == null) {
        setCollector = new DocSetCollector(maxDoc);
        collectors.add(setCollector);
    }
    Collector allCollectors = MultiCollector.wrap(collectors);
    CachingCollector cachedCollector = null;
    if (cacheSecondPassSearch && allCollectors != null) {
        int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
        // Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
        if (maxDocsToCache > 0) {
            allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
        }
    }
    if (pf.postFilter != null) {
        pf.postFilter.setLastDelegate(allCollectors);
        allCollectors = pf.postFilter;
    }
    if (allCollectors != null) {
        searchWithTimeLimiter(luceneFilter, allCollectors);
        if (allCollectors instanceof DelegatingCollector) {
            ((DelegatingCollector) allCollectors).finish();
        }
    }
    if (getGroupedDocSet && allGroupHeadsCollector != null) {
        qr.setDocSet(new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(maxDoc)));
    } else if (getDocSet) {
        qr.setDocSet(setCollector.getDocSet());
    }
    collectors.clear();
    for (Command cmd : commands) {
        Collector collector = cmd.createSecondPassCollector();
        if (collector != null)
            collectors.add(collector);
    }
    if (!collectors.isEmpty()) {
        Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
        if (collectors.size() > 0) {
            if (cachedCollector != null) {
                if (cachedCollector.isCached()) {
                    cachedCollector.replay(secondPhaseCollectors);
                } else {
                    signalCacheWarning = true;
                    logger.warn(String.format(Locale.ROOT, "The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
                    logger.warn("Please increase cache size or disable group caching.");
                    searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
                }
            } else {
                if (pf.postFilter != null) {
                    pf.postFilter.setLastDelegate(secondPhaseCollectors);
                    secondPhaseCollectors = pf.postFilter;
                }
                searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
            }
            if (secondPhaseCollectors instanceof DelegatingCollector) {
                ((DelegatingCollector) secondPhaseCollectors).finish();
            }
        }
    }
    for (Command cmd : commands) {
        cmd.finish();
    }
    qr.groupedResults = grouped;
    if (getDocList) {
        int sz = idSet.size();
        int[] ids = new int[sz];
        int idx = 0;
        for (int val : idSet) {
            ids[idx++] = val;
        }
        qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore));
    }
}
Also used : ArrayList(java.util.ArrayList) FirstPassGroupingCollector(org.apache.lucene.search.grouping.FirstPassGroupingCollector) AllGroupHeadsCollector(org.apache.lucene.search.grouping.AllGroupHeadsCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) TopGroupsCollector(org.apache.lucene.search.grouping.TopGroupsCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) AllGroupsCollector(org.apache.lucene.search.grouping.AllGroupsCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) FilterCollector(org.apache.solr.search.grouping.collector.FilterCollector) MultiCollector(org.apache.lucene.search.MultiCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) CachingCollector(org.apache.lucene.search.CachingCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) Sort(org.apache.lucene.search.Sort) CachingCollector(org.apache.lucene.search.CachingCollector) SolrException(org.apache.solr.common.SolrException)

Example 3 with CachingCollector

use of org.apache.lucene.search.CachingCollector in project lucene-solr by apache.

the class GroupingSearch method groupByFieldOrFunction.

@SuppressWarnings({ "unchecked", "rawtypes" })
protected TopGroups groupByFieldOrFunction(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
    int topN = groupOffset + groupLimit;
    final FirstPassGroupingCollector firstPassCollector = new FirstPassGroupingCollector(grouper, groupSort, topN);
    final AllGroupsCollector allGroupsCollector = allGroups ? new AllGroupsCollector(grouper) : null;
    final AllGroupHeadsCollector allGroupHeadsCollector = allGroupHeads ? AllGroupHeadsCollector.newCollector(grouper, sortWithinGroup) : null;
    final Collector firstRound = MultiCollector.wrap(firstPassCollector, allGroupsCollector, allGroupHeadsCollector);
    CachingCollector cachedCollector = null;
    if (maxCacheRAMMB != null || maxDocsToCache != null) {
        if (maxCacheRAMMB != null) {
            cachedCollector = CachingCollector.create(firstRound, cacheScores, maxCacheRAMMB);
        } else {
            cachedCollector = CachingCollector.create(firstRound, cacheScores, maxDocsToCache);
        }
        searcher.search(query, cachedCollector);
    } else {
        searcher.search(query, firstRound);
    }
    matchingGroups = allGroups ? allGroupsCollector.getGroups() : Collections.emptyList();
    matchingGroupHeads = allGroupHeads ? allGroupHeadsCollector.retrieveGroupHeads(searcher.getIndexReader().maxDoc()) : new Bits.MatchNoBits(searcher.getIndexReader().maxDoc());
    Collection<SearchGroup> topSearchGroups = firstPassCollector.getTopGroups(groupOffset, fillSortFields);
    if (topSearchGroups == null) {
        return new TopGroups(new SortField[0], new SortField[0], 0, 0, new GroupDocs[0], Float.NaN);
    }
    int topNInsideGroup = groupDocsOffset + groupDocsLimit;
    TopGroupsCollector secondPassCollector = new TopGroupsCollector(grouper, topSearchGroups, groupSort, sortWithinGroup, topNInsideGroup, includeScores, includeMaxScore, fillSortFields);
    if (cachedCollector != null && cachedCollector.isCached()) {
        cachedCollector.replay(secondPassCollector);
    } else {
        searcher.search(query, secondPassCollector);
    }
    if (allGroups) {
        return new TopGroups(secondPassCollector.getTopGroups(groupDocsOffset), matchingGroups.size());
    } else {
        return secondPassCollector.getTopGroups(groupDocsOffset);
    }
}
Also used : Collector(org.apache.lucene.search.Collector) CachingCollector(org.apache.lucene.search.CachingCollector) MultiCollector(org.apache.lucene.search.MultiCollector) CachingCollector(org.apache.lucene.search.CachingCollector)

Aggregations

CachingCollector (org.apache.lucene.search.CachingCollector)3 Collector (org.apache.lucene.search.Collector)3 MultiCollector (org.apache.lucene.search.MultiCollector)3 ArrayList (java.util.ArrayList)2 Sort (org.apache.lucene.search.Sort)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)1 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)1 StringField (org.apache.lucene.document.StringField)1 TextField (org.apache.lucene.document.TextField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 NumericDocValues (org.apache.lucene.index.NumericDocValues)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 Term (org.apache.lucene.index.Term)1 FieldDoc (org.apache.lucene.search.FieldDoc)1