Examples with Collector - org.apache.lucene.search.Collector

Example 1 with Collector

use of org.apache.lucene.search.Collector in project elasticsearch by elastic.

the class QueryPhaseTests method countTestCase.

private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception {
    TestSearchContext context = new TestSearchContext(null);
    context.parsedQuery(new ParsedQuery(query));
    context.setSize(0);
    context.setTask(new SearchTask(123L, "", "", "", null));
    IndexSearcher searcher = new IndexSearcher(reader);
    final AtomicBoolean collected = new AtomicBoolean();
    IndexSearcher contextSearcher = new IndexSearcher(reader) {

        protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
            collected.set(true);
            super.search(leaves, weight, collector);
        }
    };
    final boolean rescore = QueryPhase.execute(context, contextSearcher);
    assertFalse(rescore);
    assertEquals(searcher.count(query), context.queryResult().topDocs().totalHits);
    assertEquals(shouldCollect, collected.get());
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestSearchContext(org.elasticsearch.test.TestSearchContext) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) SearchTask(org.elasticsearch.action.search.SearchTask) Collector(org.apache.lucene.search.Collector) List(java.util.List) Weight(org.apache.lucene.search.Weight)

Example 2 with Collector

use of org.apache.lucene.search.Collector in project lucene-solr by apache.

the class CommandHandler method computeDocSet.

private DocSet computeDocSet(Query query, ProcessedFilter filter, List<Collector> collectors) throws IOException {
    int maxDoc = searcher.maxDoc();
    final DocSetCollector docSetCollector = new DocSetCollector(maxDoc);
    List<Collector> allCollectors = new ArrayList<>(collectors);
    allCollectors.add(docSetCollector);
    searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
    return DocSetUtil.getDocSet(docSetCollector, searcher);
}

Also used : ArrayList(java.util.ArrayList) DocSetCollector(org.apache.solr.search.DocSetCollector) TimeLimitingCollector(org.apache.lucene.search.TimeLimitingCollector) AllGroupHeadsCollector(org.apache.lucene.search.grouping.AllGroupHeadsCollector) MultiCollector(org.apache.lucene.search.MultiCollector) TotalHitCountCollector(org.apache.lucene.search.TotalHitCountCollector) Collector(org.apache.lucene.search.Collector) DocSetCollector(org.apache.solr.search.DocSetCollector)

Example 3 with Collector

use of org.apache.lucene.search.Collector in project lucene-solr by apache.

the class AnalyzingInfixSuggester method lookup.

/**
   * This is an advanced method providing the capability to send down to the suggester any 
   * arbitrary lucene query to be used to filter the result of the suggester
   * 
   * @param key the keyword being looked for
   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
   * @param num number of items to return
   * @param allTermsRequired all searched terms must match or not
   * @param doHighlight if true, the matching term will be highlighted in the search result
   * @return the result of the suggester
   * @throws IOException f the is IO exception while reading data from the index
   */
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
    if (searcherMgr == null) {
        throw new IllegalStateException("suggester was not built");
    }
    final BooleanClause.Occur occur;
    if (allTermsRequired) {
        occur = BooleanClause.Occur.MUST;
    } else {
        occur = BooleanClause.Occur.SHOULD;
    }
    BooleanQuery.Builder query;
    Set<String> matchedTokens;
    String prefixToken = null;
    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
        //long t0 = System.currentTimeMillis();
        ts.reset();
        final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        String lastToken = null;
        query = new BooleanQuery.Builder();
        int maxEndOffset = -1;
        matchedTokens = new HashSet<>();
        while (ts.incrementToken()) {
            if (lastToken != null) {
                matchedTokens.add(lastToken);
                query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
            }
            lastToken = termAtt.toString();
            if (lastToken != null) {
                maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
            }
        }
        ts.end();
        if (lastToken != null) {
            Query lastQuery;
            if (maxEndOffset == offsetAtt.endOffset()) {
                // Use PrefixQuery (or the ngram equivalent) when
                // there was no trailing discarded chars in the
                // string (e.g. whitespace), so that if query does
                // not end with a space we show prefix matches for
                // that token:
                lastQuery = getLastTokenQuery(lastToken);
                prefixToken = lastToken;
            } else {
                // Use TermQuery for an exact match if there were
                // trailing discarded chars (e.g. whitespace), so
                // that if query ends with a space we only show
                // exact matches for that term:
                matchedTokens.add(lastToken);
                lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
            }
            if (lastQuery != null) {
                query.add(lastQuery, occur);
            }
        }
        if (contextQuery != null) {
            boolean allMustNot = true;
            for (BooleanClause clause : contextQuery.clauses()) {
                if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                    allMustNot = false;
                    break;
                }
            }
            if (allMustNot) {
                // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                for (BooleanClause clause : contextQuery.clauses()) {
                    query.add(clause);
                }
            } else if (allTermsRequired == false) {
                // We must carefully upgrade the query clauses to MUST:
                BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                newQuery.add(query.build(), BooleanClause.Occur.MUST);
                newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                query = newQuery;
            } else {
                // Add contextQuery as sub-query
                query.add(contextQuery, BooleanClause.Occur.MUST);
            }
        }
    }
    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:
    Query finalQuery = finishQuery(query, allTermsRequired);
    //System.out.println("finalQuery=" + finalQuery);
    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    List<LookupResult> results = null;
    SearcherManager mgr;
    IndexSearcher searcher;
    synchronized (searcherMgrLock) {
        // acquire & release on same SearcherManager, via local reference
        mgr = searcherMgr;
        searcher = mgr.acquire();
    }
    try {
        //System.out.println("got searcher=" + searcher);
        searcher.search(finalQuery, c2);
        TopFieldDocs hits = c.topDocs();
        // Slower way if postings are not pre-sorted by weight:
        // hits = searcher.search(query, null, num, SORT);
        results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
        mgr.release(searcher);
    }
    return results;
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SearcherManager(org.apache.lucene.search.SearcherManager) StringReader(java.io.StringReader) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Collector(org.apache.lucene.search.Collector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TermQuery(org.apache.lucene.search.TermQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 4 with Collector

use of org.apache.lucene.search.Collector in project lucene-solr by apache.

the class TestGrouping method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        //final int numDocs = _TestUtil.nextInt(random, 5, 20);
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string
                // groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
        Field group = newStringField("group", "", Field.Store.NO);
        doc.add(group);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                group.setStringValue(groupDoc.group.utf8ToString());
                idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
            } else {
                // TODO: not true
                // Must explicitly set empty string, else eg if
                // the segment has all docs missing the field then
                // we get null back instead of empty BytesRef:
                idvGroupField.setBytesValue(new BytesRef());
            }
            sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
            sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
        System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        int[] docIDToID = new int[r.maxDoc()];
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToID[i] = (int) values.longValue();
        }
        DirectoryReader rBlocks = null;
        Directory dirBlocks = null;
        final IndexSearcher s = newSearcher(r);
        if (VERBOSE) {
            System.out.println("\nTEST: searcher=" + s);
        }
        final ShardState shards = new ShardState(s);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToID[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
                assertEquals(gd.id, idValue);
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        // Build 2nd index, where docs are added in blocks by
        // group, so we can use single pass collector
        dirBlocks = newDirectory();
        rBlocks = getDocBlockReader(dirBlocks, groupDocs);
        final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
        final IndexSearcher sBlocks = newSearcher(rBlocks);
        final ShardState shardsBlocks = new ShardState(sBlocks);
        // ReaderBlocks only increases maxDoc() vs reader, which
        // means a monotonic shift in scores, so we can
        // reliably remap them w/ Map:
        final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
        values = MultiDocValues.getNumericValues(rBlocks, "id");
        assertNotNull(values);
        int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
        for (int i = 0; i < rBlocks.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToIDBlocks[i] = (int) values.longValue();
        }
        //System.out.println("fixup score2");
        for (int contentID = 0; contentID < 3; contentID++) {
            //System.out.println("  term=real" + contentID);
            final Map<Float, Float> termScoreMap = new HashMap<>();
            scoreMap.put("real" + contentID, termScoreMap);
            //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
            //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
            final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
                assertTrue(gd.score2 == 0.0);
                gd.score2 = hit.score;
                assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
                //System.out.println("    score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
                termScoreMap.put(gd.score, gd.score2);
            }
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            final boolean fillFields = random().nextBoolean();
            boolean getScores = random().nextBoolean();
            final boolean getMaxScores = random().nextBoolean();
            final Sort groupSort = getRandomSort();
            //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
            final Sort docSort = getRandomSort();
            getScores |= (groupSort.needsScores() || docSort.needsScores());
            final int topNGroups = TestUtil.nextInt(random(), 1, 30);
            //final int topNGroups = 10;
            final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
            final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
            //final int groupOffset = 0;
            final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
            //final int docOffset = 0;
            final boolean doCache = random().nextBoolean();
            final boolean doAllGroups = random().nextBoolean();
            if (VERBOSE) {
                System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
            }
            String groupField = "group";
            if (VERBOSE) {
                System.out.println("  groupField=" + groupField);
            }
            final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
            final CachingCollector cCache;
            final Collector c;
            final AllGroupsCollector<?> allGroupsCollector;
            if (doAllGroups) {
                allGroupsCollector = createAllGroupsCollector(c1, groupField);
            } else {
                allGroupsCollector = null;
            }
            final boolean useWrappingCollector = random().nextBoolean();
            if (doCache) {
                final double maxCacheMB = random().nextDouble();
                if (VERBOSE) {
                    System.out.println("TEST: maxCacheMB=" + maxCacheMB);
                }
                if (useWrappingCollector) {
                    if (doAllGroups) {
                        cCache = CachingCollector.create(c1, true, maxCacheMB);
                        c = MultiCollector.wrap(cCache, allGroupsCollector);
                    } else {
                        c = cCache = CachingCollector.create(c1, true, maxCacheMB);
                    }
                } else {
                    // Collect only into cache, then replay multiple times:
                    c = cCache = CachingCollector.create(true, maxCacheMB);
                }
            } else {
                cCache = null;
                if (doAllGroups) {
                    c = MultiCollector.wrap(c1, allGroupsCollector);
                } else {
                    c = c1;
                }
            }
            // Search top reader:
            final Query query = new TermQuery(new Term("content", searchTerm));
            s.search(query, c);
            if (doCache && !useWrappingCollector) {
                if (cCache.isCached()) {
                    // Replay for first-pass grouping
                    cCache.replay(c1);
                    if (doAllGroups) {
                        // Replay for all groups:
                        cCache.replay(allGroupsCollector);
                    }
                } else {
                    // Replay by re-running search:
                    s.search(query, c1);
                    if (doAllGroups) {
                        s.search(query, allGroupsCollector);
                    }
                }
            }
            // Get 1st pass top groups
            final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
            final TopGroups<BytesRef> groupsResult;
            if (VERBOSE) {
                System.out.println("TEST: first pass topGroups");
                if (topGroups == null) {
                    System.out.println("  null");
                } else {
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
            }
            // Get 1st pass top groups using shards
            final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
            final TopGroupsCollector<?> c2;
            if (topGroups != null) {
                if (VERBOSE) {
                    System.out.println("TEST: topGroups");
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
                c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
                if (doCache) {
                    if (cCache.isCached()) {
                        if (VERBOSE) {
                            System.out.println("TEST: cache is intact");
                        }
                        cCache.replay(c2);
                    } else {
                        if (VERBOSE) {
                            System.out.println("TEST: cache was too large");
                        }
                        s.search(query, c2);
                    }
                } else {
                    s.search(query, c2);
                }
                if (doAllGroups) {
                    TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
                    groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
                } else {
                    groupsResult = getTopGroups(c2, docOffset);
                }
            } else {
                c2 = null;
                groupsResult = null;
                if (VERBOSE) {
                    System.out.println("TEST:   no results");
                }
            }
            final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
            if (VERBOSE) {
                if (expectedGroups == null) {
                    System.out.println("TEST: no expected groups");
                } else {
                    System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + sd.doc + " score=" + sd.score);
                        }
                    }
                }
                if (groupsResult == null) {
                    System.out.println("TEST: no matched groups");
                } else {
                    System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : groupsResult.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                    if (searchIter == 14) {
                        for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
                            System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
                        }
                    }
                }
                if (topGroupsShards == null) {
                    System.out.println("TEST: no matched-merged groups");
                } else {
                    System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                }
            }
            assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
            // Confirm merged shards match:
            assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
            if (topGroupsShards != null) {
                verifyShards(shards.docStarts, topGroupsShards);
            }
            final boolean needsScores = getScores || getMaxScores || docSort == null;
            final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
            final AllGroupsCollector<BytesRef> allGroupsCollector2;
            final Collector c4;
            if (doAllGroups) {
                // NOTE: must be "group" and not "group_dv"
                // (groupField) because we didn't index doc
                // values in the block index:
                allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
                c4 = MultiCollector.wrap(c3, allGroupsCollector2);
            } else {
                allGroupsCollector2 = null;
                c4 = c3;
            }
            // Get block grouping result:
            sBlocks.search(query, c4);
            @SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
            final TopGroups<BytesRef> groupsResultBlocks;
            if (doAllGroups && tempTopGroupsBlocks != null) {
                assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
                groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
            } else {
                groupsResultBlocks = tempTopGroupsBlocks;
            }
            if (VERBOSE) {
                if (groupsResultBlocks == null) {
                    System.out.println("TEST: no block groups");
                } else {
                    System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
                    boolean first = true;
                    for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
                            if (first) {
                                System.out.println("explain: " + sBlocks.explain(query, sd.doc));
                                first = false;
                            }
                        }
                    }
                }
            }
            // Get shard'd block grouping result:
            final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
            if (expectedGroups != null) {
                // Fixup scores for reader2
                for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                    for (ScoreDoc hit : groupDocsHits.scoreDocs) {
                        final GroupDoc gd = groupDocsByID[hit.doc];
                        assertEquals(gd.id, hit.doc);
                        //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
                        hit.score = gd.score2;
                    }
                }
                final SortField[] sortFields = groupSort.getSort();
                final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
                for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
                    if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            if (groupDocsHits.groupSortValues != null) {
                                //System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
                                groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
                                assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
                            }
                        }
                    }
                }
                final SortField[] docSortFields = docSort.getSort();
                for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
                    if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
                                FieldDoc hit = (FieldDoc) _hit;
                                if (hit.fields != null) {
                                    hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
                                    assertNotNull(hit.fields[docSortIDX]);
                                }
                            }
                        }
                    }
                }
            }
            assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
            assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
        }
        r.close();
        dir.close();
        rBlocks.close();
        dirBlocks.close();
    }
}

Also used : Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) CachingCollector(org.apache.lucene.search.CachingCollector) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) StringField(org.apache.lucene.document.StringField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MultiCollector(org.apache.lucene.search.MultiCollector) Collector(org.apache.lucene.search.Collector) CachingCollector(org.apache.lucene.search.CachingCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery)

Example 5 with Collector

use of org.apache.lucene.search.Collector in project lucene-solr by apache.

the class ExpandComponent method process.

@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
        return;
    }
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
                }
            }
        }
    }
    if (field == null) {
        throw new IOException("Expand field is null.");
    }
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
            }
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
        }
    }
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    }
    Arrays.sort(globalDocs);
    Query groupQuery = null;
    /*
    * This code gathers the group information for the current page.
    */
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
        return;
    }
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        }
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
                }
            }
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                    currentValues.advance(contextDoc);
                }
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    ++count;
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            } else {
                if (globalDoc > values.docID()) {
                    values.advance(globalDoc);
                }
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    ++count;
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            }
        }
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            }
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            }
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            }
            if (value != nullValue) {
                ++count;
                groupSet.add(value);
                collapsedSet.add(globalDoc);
            }
        }
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
            }
        }
    }
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    }
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
        newFilters.add(groupQuery);
    }
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }
    if (pfilter.filter == null) {
        searcher.search(query, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
        searcher.search(q, collector);
    }
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
            }
        }
    }
    rb.rsp.add("expanded", outMap);
}

Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice(org.apache.solr.search.DocSlice) ScoreDoc(org.apache.lucene.search.ScoreDoc) FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser(org.apache.solr.search.QParser) SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator(org.apache.solr.search.DocIterator) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin(org.apache.solr.search.CollapsingQParserPlugin) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(org.apache.solr.search.DocList)

Aggregations

Collector (org.apache.lucene.search.Collector)26 ArrayList (java.util.ArrayList)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)11 Query (org.apache.lucene.search.Query)9 IOException (java.io.IOException)8 TopFieldCollector (org.apache.lucene.search.TopFieldCollector)8 LeafCollector (org.apache.lucene.search.LeafCollector)7 Sort (org.apache.lucene.search.Sort)7 Document (org.apache.lucene.document.Document)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 MultiCollector (org.apache.lucene.search.MultiCollector)6 ScoreDoc (org.apache.lucene.search.ScoreDoc)6 BytesRef (org.apache.lucene.util.BytesRef)5 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 StringField (org.apache.lucene.document.StringField)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4 Directory (org.apache.lucene.store.Directory)4 List (java.util.List)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3