Example 1 with Collector

use of in project elasticsearch by elastic.

the class QueryPhaseTests method countTestCase.

private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
    TestSearchContext context = new TestSearchContext(null);
    context.parsedQuery(new ParsedQuery(query));
    context.setTask(new SearchTask(123L, "", "", "", null));
    IndexSearcher searcher = new IndexSearcher(reader);
    final AtomicBoolean collected = new AtomicBoolean();
    IndexSearcher contextSearcher = new IndexSearcher(reader) {

        protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
  , weight, collector);
    final boolean rescore = QueryPhase.execute(context, contextSearcher);
    assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
    assertEquals(shouldCollect, collected.get());
Also used : IndexSearcher( AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestSearchContext(org.elasticsearch.test.TestSearchContext) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) SearchTask( Collector( List(java.util.List) Weight(

Example 2 with Collector

use of in project lucene-solr by apache.

the class CommandHandler method computeDocSet.

private DocSet computeDocSet(Query query, ProcessedFilter filter, List<Collector> collectors) throws IOException {
    int maxDoc = searcher.maxDoc();
    final DocSetCollector docSetCollector = new DocSetCollector(maxDoc);
    List<Collector> allCollectors = new ArrayList<>(collectors);
    searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
    return DocSetUtil.getDocSet(docSetCollector, searcher);
Also used : ArrayList(java.util.ArrayList) DocSetCollector( TimeLimitingCollector( AllGroupHeadsCollector( MultiCollector( TotalHitCountCollector( Collector( DocSetCollector(

Example 3 with Collector

use of in project lucene-solr by apache.

the class AnalyzingInfixSuggester method lookup.

   * This is an advanced method providing the capability to send down to the suggester any 
   * arbitrary lucene query to be used to filter the result of the suggester
   * @param key the keyword being looked for
   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
   * @param num number of items to return
   * @param allTermsRequired all searched terms must match or not
   * @param doHighlight if true, the matching term will be highlighted in the search result
   * @return the result of the suggester
   * @throws IOException f the is IO exception while reading data from the index
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
    if (searcherMgr == null) {
        throw new IllegalStateException("suggester was not built");
    final BooleanClause.Occur occur;
    if (allTermsRequired) {
        occur = BooleanClause.Occur.MUST;
    } else {
        occur = BooleanClause.Occur.SHOULD;
    BooleanQuery.Builder query;
    Set<String> matchedTokens;
    String prefixToken = null;
    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
        //long t0 = System.currentTimeMillis();
        final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        String lastToken = null;
        query = new BooleanQuery.Builder();
        int maxEndOffset = -1;
        matchedTokens = new HashSet<>();
        while (ts.incrementToken()) {
            if (lastToken != null) {
                query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
            lastToken = termAtt.toString();
            if (lastToken != null) {
                maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
        if (lastToken != null) {
            Query lastQuery;
            if (maxEndOffset == offsetAtt.endOffset()) {
                // Use PrefixQuery (or the ngram equivalent) when
                // there was no trailing discarded chars in the
                // string (e.g. whitespace), so that if query does
                // not end with a space we show prefix matches for
                // that token:
                lastQuery = getLastTokenQuery(lastToken);
                prefixToken = lastToken;
            } else {
                // Use TermQuery for an exact match if there were
                // trailing discarded chars (e.g. whitespace), so
                // that if query ends with a space we only show
                // exact matches for that term:
                lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
            if (lastQuery != null) {
                query.add(lastQuery, occur);
        if (contextQuery != null) {
            boolean allMustNot = true;
            for (BooleanClause clause : contextQuery.clauses()) {
                if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                    allMustNot = false;
            if (allMustNot) {
                // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                for (BooleanClause clause : contextQuery.clauses()) {
            } else if (allTermsRequired == false) {
                // We must carefully upgrade the query clauses to MUST:
                BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                newQuery.add(, BooleanClause.Occur.MUST);
                newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                query = newQuery;
            } else {
                // Add contextQuery as sub-query
                query.add(contextQuery, BooleanClause.Occur.MUST);
    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:
    Query finalQuery = finishQuery(query, allTermsRequired);
    //System.out.println("finalQuery=" + finalQuery);
    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    List<LookupResult> results = null;
    SearcherManager mgr;
    IndexSearcher searcher;
    synchronized (searcherMgrLock) {
        // acquire & release on same SearcherManager, via local reference
        mgr = searcherMgr;
        searcher = mgr.acquire();
    try {
        //System.out.println("got searcher=" + searcher);, c2);
        TopFieldDocs hits = c.topDocs();
        // Slower way if postings are not pre-sorted by weight:
        // hits =, null, num, SORT);
        results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
    return results;
Also used : IndexSearcher( BooleanQuery( TokenStream(org.apache.lucene.analysis.TokenStream) Query( PrefixQuery( TermQuery( BooleanQuery( TopFieldDocs( SearcherManager( StringReader( EarlyTerminatingSortingCollector( TopFieldCollector( Collector( TopFieldCollector( TermQuery( Occur( Term(org.apache.lucene.index.Term) BooleanClause( EarlyTerminatingSortingCollector( CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 4 with Collector

use of in project lucene-solr by apache.

the class TestGrouping method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        //final int numDocs = _TestUtil.nextInt(random, 5, 20);
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string
                // groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
        Field group = newStringField("group", "", Field.Store.NO);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        Field content = newTextField("content", "", Field.Store.NO);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + ( == null ? "null" : + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
            groupDocs[i] = groupDoc;
            if ( != null) {
            } else {
                // TODO: not true
                // Must explicitly set empty string, else eg if
                // the segment has all docs missing the field then
                // we get null back instead of empty BytesRef:
                idvGroupField.setBytesValue(new BytesRef());
            if ( == null) {
            } else {
        final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
        System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
        final DirectoryReader r = w.getReader();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        int[] docIDToID = new int[r.maxDoc()];
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToID[i] = (int) values.longValue();
        DirectoryReader rBlocks = null;
        Directory dirBlocks = null;
        final IndexSearcher s = newSearcher(r);
        if (VERBOSE) {
            System.out.println("\nTEST: searcher=" + s);
        final ShardState shards = new ShardState(s);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToID[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
                assertEquals(, idValue);
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        for (GroupDoc gd : groupDocs) {
            assertTrue(gd.score >= 0.0);
        // Build 2nd index, where docs are added in blocks by
        // group, so we can use single pass collector
        dirBlocks = newDirectory();
        rBlocks = getDocBlockReader(dirBlocks, groupDocs);
        final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
        final IndexSearcher sBlocks = newSearcher(rBlocks);
        final ShardState shardsBlocks = new ShardState(sBlocks);
        // ReaderBlocks only increases maxDoc() vs reader, which
        // means a monotonic shift in scores, so we can
        // reliably remap them w/ Map:
        final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
        values = MultiDocValues.getNumericValues(rBlocks, "id");
        int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
        for (int i = 0; i < rBlocks.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToIDBlocks[i] = (int) values.longValue();
        //System.out.println("fixup score2");
        for (int contentID = 0; contentID < 3; contentID++) {
            //System.out.println("  term=real" + contentID);
            final Map<Float, Float> termScoreMap = new HashMap<>();
            scoreMap.put("real" + contentID, termScoreMap);
            //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
            //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
            final ScoreDoc[] hits = TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
                assertTrue(gd.score2 == 0.0);
                gd.score2 = hit.score;
                assertEquals(, docIDToIDBlocks[hit.doc]);
                //System.out.println("    score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
                termScoreMap.put(gd.score, gd.score2);
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: searchIter=" + searchIter);
            final String searchTerm = "real" + random().nextInt(3);
            final boolean fillFields = random().nextBoolean();
            boolean getScores = random().nextBoolean();
            final boolean getMaxScores = random().nextBoolean();
            final Sort groupSort = getRandomSort();
            //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
            final Sort docSort = getRandomSort();
            getScores |= (groupSort.needsScores() || docSort.needsScores());
            final int topNGroups = TestUtil.nextInt(random(), 1, 30);
            //final int topNGroups = 10;
            final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
            final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
            //final int groupOffset = 0;
            final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
            //final int docOffset = 0;
            final boolean doCache = random().nextBoolean();
            final boolean doAllGroups = random().nextBoolean();
            if (VERBOSE) {
                System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
            String groupField = "group";
            if (VERBOSE) {
                System.out.println("  groupField=" + groupField);
            final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
            final CachingCollector cCache;
            final Collector c;
            final AllGroupsCollector<?> allGroupsCollector;
            if (doAllGroups) {
                allGroupsCollector = createAllGroupsCollector(c1, groupField);
            } else {
                allGroupsCollector = null;
            final boolean useWrappingCollector = random().nextBoolean();
            if (doCache) {
                final double maxCacheMB = random().nextDouble();
                if (VERBOSE) {
                    System.out.println("TEST: maxCacheMB=" + maxCacheMB);
                if (useWrappingCollector) {
                    if (doAllGroups) {
                        cCache = CachingCollector.create(c1, true, maxCacheMB);
                        c = MultiCollector.wrap(cCache, allGroupsCollector);
                    } else {
                        c = cCache = CachingCollector.create(c1, true, maxCacheMB);
                } else {
                    // Collect only into cache, then replay multiple times:
                    c = cCache = CachingCollector.create(true, maxCacheMB);
            } else {
                cCache = null;
                if (doAllGroups) {
                    c = MultiCollector.wrap(c1, allGroupsCollector);
                } else {
                    c = c1;
            // Search top reader:
            final Query query = new TermQuery(new Term("content", searchTerm));
  , c);
            if (doCache && !useWrappingCollector) {
                if (cCache.isCached()) {
                    // Replay for first-pass grouping
                    if (doAllGroups) {
                        // Replay for all groups:
                } else {
                    // Replay by re-running search:
          , c1);
                    if (doAllGroups) {
              , allGroupsCollector);
            // Get 1st pass top groups
            final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
            final TopGroups<BytesRef> groupsResult;
            if (VERBOSE) {
                System.out.println("TEST: first pass topGroups");
                if (topGroups == null) {
                    System.out.println("  null");
                } else {
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
            // Get 1st pass top groups using shards
            final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
            final TopGroupsCollector<?> c2;
            if (topGroups != null) {
                if (VERBOSE) {
                    System.out.println("TEST: topGroups");
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
                if (doCache) {
                    if (cCache.isCached()) {
                        if (VERBOSE) {
                            System.out.println("TEST: cache is intact");
                    } else {
                        if (VERBOSE) {
                            System.out.println("TEST: cache was too large");
              , c2);
                } else {
          , c2);
                if (doAllGroups) {
                    TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
                    groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
                } else {
                    groupsResult = getTopGroups(c2, docOffset);
            } else {
                c2 = null;
                groupsResult = null;
                if (VERBOSE) {
                    System.out.println("TEST:   no results");
            final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
            if (VERBOSE) {
                if (expectedGroups == null) {
                    System.out.println("TEST: no expected groups");
                } else {
                    System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + sd.doc + " score=" + sd.score);
                if (groupsResult == null) {
                    System.out.println("TEST: no matched groups");
                } else {
                    System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : groupsResult.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                    if (searchIter == 14) {
                        for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
                            System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
                if (topGroupsShards == null) {
                    System.out.println("TEST: no matched-merged groups");
                } else {
                    System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
            assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
            // Confirm merged shards match:
            assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
            if (topGroupsShards != null) {
                verifyShards(shards.docStarts, topGroupsShards);
            final boolean needsScores = getScores || getMaxScores || docSort == null;
            final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
            final AllGroupsCollector<BytesRef> allGroupsCollector2;
            final Collector c4;
            if (doAllGroups) {
                // NOTE: must be "group" and not "group_dv"
                // (groupField) because we didn't index doc
                // values in the block index:
                allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
                c4 = MultiCollector.wrap(c3, allGroupsCollector2);
            } else {
                allGroupsCollector2 = null;
                c4 = c3;
            // Get block grouping result:
  , c4);
            @SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
            final TopGroups<BytesRef> groupsResultBlocks;
            if (doAllGroups && tempTopGroupsBlocks != null) {
                assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
                groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
            } else {
                groupsResultBlocks = tempTopGroupsBlocks;
            if (VERBOSE) {
                if (groupsResultBlocks == null) {
                    System.out.println("TEST: no block groups");
                } else {
                    System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
                    boolean first = true;
                    for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
                            if (first) {
                                System.out.println("explain: " + sBlocks.explain(query, sd.doc));
                                first = false;
            // Get shard'd block grouping result:
            final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
            if (expectedGroups != null) {
                // Fixup scores for reader2
                for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                    for (ScoreDoc hit : groupDocsHits.scoreDocs) {
                        final GroupDoc gd = groupDocsByID[hit.doc];
                        assertEquals(, hit.doc);
                        //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
                        hit.score = gd.score2;
                final SortField[] sortFields = groupSort.getSort();
                final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
                for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
                    if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            if (groupDocsHits.groupSortValues != null) {
                                //System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
                                groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
                final SortField[] docSortFields = docSort.getSort();
                for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
                    if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
                                FieldDoc hit = (FieldDoc) _hit;
                                if (hit.fields != null) {
                                    hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
            assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
            assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
Also used : Query( TermQuery( FieldDoc( HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField( Document(org.apache.lucene.document.Document) ScoreDoc( MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort( CachingCollector( HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher( NumericDocValues(org.apache.lucene.index.NumericDocValues) StringField(org.apache.lucene.document.StringField) SortField( NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MultiCollector( Collector( CachingCollector( BytesRef(org.apache.lucene.util.BytesRef) Directory( TermQuery(

Example 5 with Collector

use of in project lucene-solr by apache.

the class ExpandComponent method process.

public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
    if (field == null) {
        throw new IOException("Expand field is null.");
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
        } catch (Exception e) {
            throw new IOException(e);
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    Query groupQuery = null;
    * This code gathers the group information for the current page.
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
            } else {
                if (globalDoc > values.docID()) {
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            if (value != nullValue) {
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    if (pfilter.filter == null) {, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();, collector);
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
    rb.rsp.add("expanded", outMap);
Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery( Query( TermInSetQuery( SolrConstantScoreQuery( BooleanQuery( HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice( ScoreDoc( FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser( SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator( SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs( TopFieldCollector( LeafCollector( Collector( TopScoreDocCollector( TopDocsCollector( BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException( SolrIndexSearcher( IOException( FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin( SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(


