Search in sources :

Example 11 with FieldDoc

use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.

the class TestDemoExpressions method testDistanceSort.

public void testDistanceSort() throws Exception {
    Expression distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
    SimpleBindings bindings = new SimpleBindings();
    bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
    bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
    Sort sort = new Sort(distance.getSortField(bindings, false));
    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
    FieldDoc d = (FieldDoc) td.scoreDocs[0];
    assertEquals(0.4621D, (Double) d.fields[0], 1E-4);
    d = (FieldDoc) td.scoreDocs[1];
    assertEquals(1.055D, (Double) d.fields[0], 1E-4);
    d = (FieldDoc) td.scoreDocs[2];
    assertEquals(5.2859D, (Double) d.fields[0], 1E-4);
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) Sort(org.apache.lucene.search.Sort) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery)

Example 12 with FieldDoc

use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.

the class TestDemoExpressions method doTestLotsOfBindings.

private void doTestLotsOfBindings(int n) throws Exception {
    SimpleBindings bindings = new SimpleBindings();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < n; i++) {
        if (i > 0) {
            sb.append("+");
        }
        sb.append("x" + i);
        bindings.add(new SortField("x" + i, SortField.Type.SCORE));
    }
    Expression expr = JavascriptCompiler.compile(sb.toString());
    Sort sort = new Sort(expr.getSortField(bindings, true));
    Query query = new TermQuery(new Term("body", "contents"));
    TopFieldDocs td = searcher.search(query, 3, sort, true, true);
    for (int i = 0; i < 3; i++) {
        FieldDoc d = (FieldDoc) td.scoreDocs[i];
        float expected = n * d.score;
        float actual = ((Double) d.fields[0]).floatValue();
        assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term) Sort(org.apache.lucene.search.Sort)

Example 13 with FieldDoc

use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.

the class TestGrouping method slowGrouping.

private TopGroups<BytesRef> slowGrouping(GroupDoc[] groupDocs, String searchTerm, boolean fillFields, boolean getScores, boolean getMaxScores, boolean doAllGroups, Sort groupSort, Sort docSort, int topNGroups, int docsPerGroup, int groupOffset, int docOffset) {
    final Comparator<GroupDoc> groupSortComp = getComparator(groupSort);
    Arrays.sort(groupDocs, groupSortComp);
    final HashMap<BytesRef, List<GroupDoc>> groups = new HashMap<>();
    final List<BytesRef> sortedGroups = new ArrayList<>();
    final List<Comparable<?>[]> sortedGroupFields = new ArrayList<>();
    int totalHitCount = 0;
    Set<BytesRef> knownGroups = new HashSet<>();
    //System.out.println("TEST: slowGrouping");
    for (GroupDoc d : groupDocs) {
        // TODO: would be better to filter by searchTerm before sorting!
        if (!d.content.startsWith(searchTerm)) {
            continue;
        }
        totalHitCount++;
        if (doAllGroups) {
            if (!knownGroups.contains(d.group)) {
                knownGroups.add(d.group);
            //System.out.println("    add group=" + groupToString(d.group));
            }
        }
        List<GroupDoc> l = groups.get(d.group);
        if (l == null) {
            //System.out.println("    add sortedGroup=" + groupToString(d.group));
            sortedGroups.add(d.group);
            if (fillFields) {
                sortedGroupFields.add(fillFields(d, groupSort));
            }
            l = new ArrayList<>();
            groups.put(d.group, l);
        }
        l.add(d);
    }
    if (groupOffset >= sortedGroups.size()) {
        // slice is out of bounds
        return null;
    }
    final int limit = Math.min(groupOffset + topNGroups, groups.size());
    final Comparator<GroupDoc> docSortComp = getComparator(docSort);
    @SuppressWarnings({ "unchecked", "rawtypes" }) final GroupDocs<BytesRef>[] result = new GroupDocs[limit - groupOffset];
    int totalGroupedHitCount = 0;
    for (int idx = groupOffset; idx < limit; idx++) {
        final BytesRef group = sortedGroups.get(idx);
        final List<GroupDoc> docs = groups.get(group);
        totalGroupedHitCount += docs.size();
        Collections.sort(docs, docSortComp);
        final ScoreDoc[] hits;
        if (docs.size() > docOffset) {
            final int docIDXLimit = Math.min(docOffset + docsPerGroup, docs.size());
            hits = new ScoreDoc[docIDXLimit - docOffset];
            for (int docIDX = docOffset; docIDX < docIDXLimit; docIDX++) {
                final GroupDoc d = docs.get(docIDX);
                final FieldDoc fd;
                if (fillFields) {
                    fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN, fillFields(d, docSort));
                } else {
                    fd = new FieldDoc(d.id, getScores ? d.score : Float.NaN);
                }
                hits[docIDX - docOffset] = fd;
            }
        } else {
            hits = new ScoreDoc[0];
        }
        result[idx - groupOffset] = new GroupDocs<>(Float.NaN, 0.0f, docs.size(), hits, group, fillFields ? sortedGroupFields.get(idx) : null);
    }
    if (doAllGroups) {
        return new TopGroups<>(new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result, Float.NaN), knownGroups.size());
    } else {
        return new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result, Float.NaN);
    }
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ScoreDoc(org.apache.lucene.search.ScoreDoc) ArrayList(java.util.ArrayList) List(java.util.List) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Example 14 with FieldDoc

use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.

the class TestGrouping method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter);
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        //final int numDocs = _TestUtil.nextInt(random, 5, 20);
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string
                // groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field idvGroupField = new SortedDocValuesField("group", new BytesRef());
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
        Field group = newStringField("group", "", Field.Store.NO);
        doc.add(group);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                group.setStringValue(groupDoc.group.utf8ToString());
                idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
            } else {
                // TODO: not true
                // Must explicitly set empty string, else eg if
                // the segment has all docs missing the field then
                // we get null back instead of empty BytesRef:
                idvGroupField.setBytesValue(new BytesRef());
            }
            sort1.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort1));
            sort2.setBytesValue(BytesRef.deepCopyOf(groupDoc.sort2));
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
        System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        int[] docIDToID = new int[r.maxDoc()];
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToID[i] = (int) values.longValue();
        }
        DirectoryReader rBlocks = null;
        Directory dirBlocks = null;
        final IndexSearcher s = newSearcher(r);
        if (VERBOSE) {
            System.out.println("\nTEST: searcher=" + s);
        }
        final ShardState shards = new ShardState(s);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToID[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
                assertEquals(gd.id, idValue);
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        // Build 2nd index, where docs are added in blocks by
        // group, so we can use single pass collector
        dirBlocks = newDirectory();
        rBlocks = getDocBlockReader(dirBlocks, groupDocs);
        final Query lastDocInBlock = new TermQuery(new Term("groupend", "x"));
        final IndexSearcher sBlocks = newSearcher(rBlocks);
        final ShardState shardsBlocks = new ShardState(sBlocks);
        // ReaderBlocks only increases maxDoc() vs reader, which
        // means a monotonic shift in scores, so we can
        // reliably remap them w/ Map:
        final Map<String, Map<Float, Float>> scoreMap = new HashMap<>();
        values = MultiDocValues.getNumericValues(rBlocks, "id");
        assertNotNull(values);
        int[] docIDToIDBlocks = new int[rBlocks.maxDoc()];
        for (int i = 0; i < rBlocks.maxDoc(); i++) {
            assertEquals(i, values.nextDoc());
            docIDToIDBlocks[i] = (int) values.longValue();
        }
        //System.out.println("fixup score2");
        for (int contentID = 0; contentID < 3; contentID++) {
            //System.out.println("  term=real" + contentID);
            final Map<Float, Float> termScoreMap = new HashMap<>();
            scoreMap.put("real" + contentID, termScoreMap);
            //System.out.println("term=real" + contentID + " dfold=" + s.docFreq(new Term("content", "real"+contentID)) +
            //" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
            final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
                assertTrue(gd.score2 == 0.0);
                gd.score2 = hit.score;
                assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
                //System.out.println("    score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
                termScoreMap.put(gd.score, gd.score2);
            }
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            final boolean fillFields = random().nextBoolean();
            boolean getScores = random().nextBoolean();
            final boolean getMaxScores = random().nextBoolean();
            final Sort groupSort = getRandomSort();
            //final Sort groupSort = new Sort(new SortField[] {new SortField("sort1", SortField.STRING), new SortField("id", SortField.INT)});
            final Sort docSort = getRandomSort();
            getScores |= (groupSort.needsScores() || docSort.needsScores());
            final int topNGroups = TestUtil.nextInt(random(), 1, 30);
            //final int topNGroups = 10;
            final int docsPerGroup = TestUtil.nextInt(random(), 1, 50);
            final int groupOffset = TestUtil.nextInt(random(), 0, (topNGroups - 1) / 2);
            //final int groupOffset = 0;
            final int docOffset = TestUtil.nextInt(random(), 0, docsPerGroup - 1);
            //final int docOffset = 0;
            final boolean doCache = random().nextBoolean();
            final boolean doAllGroups = random().nextBoolean();
            if (VERBOSE) {
                System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) + " dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
            }
            String groupField = "group";
            if (VERBOSE) {
                System.out.println("  groupField=" + groupField);
            }
            final FirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
            final CachingCollector cCache;
            final Collector c;
            final AllGroupsCollector<?> allGroupsCollector;
            if (doAllGroups) {
                allGroupsCollector = createAllGroupsCollector(c1, groupField);
            } else {
                allGroupsCollector = null;
            }
            final boolean useWrappingCollector = random().nextBoolean();
            if (doCache) {
                final double maxCacheMB = random().nextDouble();
                if (VERBOSE) {
                    System.out.println("TEST: maxCacheMB=" + maxCacheMB);
                }
                if (useWrappingCollector) {
                    if (doAllGroups) {
                        cCache = CachingCollector.create(c1, true, maxCacheMB);
                        c = MultiCollector.wrap(cCache, allGroupsCollector);
                    } else {
                        c = cCache = CachingCollector.create(c1, true, maxCacheMB);
                    }
                } else {
                    // Collect only into cache, then replay multiple times:
                    c = cCache = CachingCollector.create(true, maxCacheMB);
                }
            } else {
                cCache = null;
                if (doAllGroups) {
                    c = MultiCollector.wrap(c1, allGroupsCollector);
                } else {
                    c = c1;
                }
            }
            // Search top reader:
            final Query query = new TermQuery(new Term("content", searchTerm));
            s.search(query, c);
            if (doCache && !useWrappingCollector) {
                if (cCache.isCached()) {
                    // Replay for first-pass grouping
                    cCache.replay(c1);
                    if (doAllGroups) {
                        // Replay for all groups:
                        cCache.replay(allGroupsCollector);
                    }
                } else {
                    // Replay by re-running search:
                    s.search(query, c1);
                    if (doAllGroups) {
                        s.search(query, allGroupsCollector);
                    }
                }
            }
            // Get 1st pass top groups
            final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(c1, groupOffset, fillFields);
            final TopGroups<BytesRef> groupsResult;
            if (VERBOSE) {
                System.out.println("TEST: first pass topGroups");
                if (topGroups == null) {
                    System.out.println("  null");
                } else {
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
            }
            // Get 1st pass top groups using shards
            final TopGroups<BytesRef> topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, true, true);
            final TopGroupsCollector<?> c2;
            if (topGroups != null) {
                if (VERBOSE) {
                    System.out.println("TEST: topGroups");
                    for (SearchGroup<BytesRef> searchGroup : topGroups) {
                        System.out.println("  " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
                    }
                }
                c2 = createSecondPassCollector(c1, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
                if (doCache) {
                    if (cCache.isCached()) {
                        if (VERBOSE) {
                            System.out.println("TEST: cache is intact");
                        }
                        cCache.replay(c2);
                    } else {
                        if (VERBOSE) {
                            System.out.println("TEST: cache was too large");
                        }
                        s.search(query, c2);
                    }
                } else {
                    s.search(query, c2);
                }
                if (doAllGroups) {
                    TopGroups<BytesRef> tempTopGroups = getTopGroups(c2, docOffset);
                    groupsResult = new TopGroups<>(tempTopGroups, allGroupsCollector.getGroupCount());
                } else {
                    groupsResult = getTopGroups(c2, docOffset);
                }
            } else {
                c2 = null;
                groupsResult = null;
                if (VERBOSE) {
                    System.out.println("TEST:   no results");
                }
            }
            final TopGroups<BytesRef> expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, doAllGroups, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
            if (VERBOSE) {
                if (expectedGroups == null) {
                    System.out.println("TEST: no expected groups");
                } else {
                    System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : expectedGroups.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + sd.doc + " score=" + sd.score);
                        }
                    }
                }
                if (groupsResult == null) {
                    System.out.println("TEST: no matched groups");
                } else {
                    System.out.println("TEST: matched groups totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : groupsResult.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                    if (searchIter == 14) {
                        for (int docIDX = 0; docIDX < s.getIndexReader().maxDoc(); docIDX++) {
                            System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
                        }
                    }
                }
                if (topGroupsShards == null) {
                    System.out.println("TEST: no matched-merged groups");
                } else {
                    System.out.println("TEST: matched-merged groups totalGroupedHitCount=" + topGroupsShards.totalGroupedHitCount);
                    for (GroupDocs<BytesRef> gd : topGroupsShards.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToID[sd.doc] + " score=" + sd.score);
                        }
                    }
                }
            }
            assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, true);
            // Confirm merged shards match:
            assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, true);
            if (topGroupsShards != null) {
                verifyShards(shards.docStarts, topGroupsShards);
            }
            final boolean needsScores = getScores || getMaxScores || docSort == null;
            final BlockGroupingCollector c3 = new BlockGroupingCollector(groupSort, groupOffset + topNGroups, needsScores, sBlocks.createNormalizedWeight(lastDocInBlock, false));
            final AllGroupsCollector<BytesRef> allGroupsCollector2;
            final Collector c4;
            if (doAllGroups) {
                // NOTE: must be "group" and not "group_dv"
                // (groupField) because we didn't index doc
                // values in the block index:
                allGroupsCollector2 = new AllGroupsCollector<>(new TermGroupSelector("group"));
                c4 = MultiCollector.wrap(c3, allGroupsCollector2);
            } else {
                allGroupsCollector2 = null;
                c4 = c3;
            }
            // Get block grouping result:
            sBlocks.search(query, c4);
            @SuppressWarnings({ "unchecked", "rawtypes" }) final TopGroups<BytesRef> tempTopGroupsBlocks = (TopGroups<BytesRef>) c3.getTopGroups(docSort, groupOffset, docOffset, docOffset + docsPerGroup, fillFields);
            final TopGroups<BytesRef> groupsResultBlocks;
            if (doAllGroups && tempTopGroupsBlocks != null) {
                assertEquals((int) tempTopGroupsBlocks.totalGroupCount, allGroupsCollector2.getGroupCount());
                groupsResultBlocks = new TopGroups<>(tempTopGroupsBlocks, allGroupsCollector2.getGroupCount());
            } else {
                groupsResultBlocks = tempTopGroupsBlocks;
            }
            if (VERBOSE) {
                if (groupsResultBlocks == null) {
                    System.out.println("TEST: no block groups");
                } else {
                    System.out.println("TEST: block groups totalGroupedHitCount=" + groupsResultBlocks.totalGroupedHitCount);
                    boolean first = true;
                    for (GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
                        System.out.println("  group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
                        for (ScoreDoc sd : gd.scoreDocs) {
                            System.out.println("    id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
                            if (first) {
                                System.out.println("explain: " + sBlocks.explain(query, sd.doc));
                                first = false;
                            }
                        }
                    }
                }
            }
            // Get shard'd block grouping result:
            final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false);
            if (expectedGroups != null) {
                // Fixup scores for reader2
                for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                    for (ScoreDoc hit : groupDocsHits.scoreDocs) {
                        final GroupDoc gd = groupDocsByID[hit.doc];
                        assertEquals(gd.id, hit.doc);
                        //System.out.println("fixup score " + hit.score + " to " + gd.score2 + " vs " + gd.score);
                        hit.score = gd.score2;
                    }
                }
                final SortField[] sortFields = groupSort.getSort();
                final Map<Float, Float> termScoreMap = scoreMap.get(searchTerm);
                for (int groupSortIDX = 0; groupSortIDX < sortFields.length; groupSortIDX++) {
                    if (sortFields[groupSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            if (groupDocsHits.groupSortValues != null) {
                                //System.out.println("remap " + groupDocsHits.groupSortValues[groupSortIDX] + " to " + termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]));
                                groupDocsHits.groupSortValues[groupSortIDX] = termScoreMap.get(groupDocsHits.groupSortValues[groupSortIDX]);
                                assertNotNull(groupDocsHits.groupSortValues[groupSortIDX]);
                            }
                        }
                    }
                }
                final SortField[] docSortFields = docSort.getSort();
                for (int docSortIDX = 0; docSortIDX < docSortFields.length; docSortIDX++) {
                    if (docSortFields[docSortIDX].getType() == SortField.Type.SCORE) {
                        for (GroupDocs<?> groupDocsHits : expectedGroups.groups) {
                            for (ScoreDoc _hit : groupDocsHits.scoreDocs) {
                                FieldDoc hit = (FieldDoc) _hit;
                                if (hit.fields != null) {
                                    hit.fields[docSortIDX] = termScoreMap.get(hit.fields[docSortIDX]);
                                    assertNotNull(hit.fields[docSortIDX]);
                                }
                            }
                        }
                    }
                }
            }
            assertEquals(docIDToIDBlocks, expectedGroups, groupsResultBlocks, false, true, true, getScores, false);
            assertEquals(docIDToIDBlocks, expectedGroups, topGroupsBlockShards, false, false, fillFields, getScores, false);
        }
        r.close();
        dir.close();
        rBlocks.close();
        dirBlocks.close();
    }
}
Also used : Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) CachingCollector(org.apache.lucene.search.CachingCollector) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) HashMap(java.util.HashMap) Map(java.util.Map) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) StringField(org.apache.lucene.document.StringField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MultiCollector(org.apache.lucene.search.MultiCollector) Collector(org.apache.lucene.search.Collector) CachingCollector(org.apache.lucene.search.CachingCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery)

Example 15 with FieldDoc

use of org.apache.lucene.search.FieldDoc in project lucene-solr by apache.

the class TestNearest method testNearestNeighborRandom.

public void testNearestNeighborRandom() throws Exception {
    int numPoints = atLeast(5000);
    Directory dir;
    if (numPoints > 100000) {
        dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
    } else {
        dir = newDirectory();
    }
    double[] lats = new double[numPoints];
    double[] lons = new double[numPoints];
    IndexWriterConfig iwc = getIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int id = 0; id < numPoints; id++) {
        lats[id] = quantizeLat(GeoTestUtil.nextLatitude());
        lons[id] = quantizeLon(GeoTestUtil.nextLongitude());
        Document doc = new Document();
        doc.add(new LatLonPoint("point", lats[id], lons[id]));
        doc.add(new LatLonDocValuesField("point", lats[id], lons[id]));
        doc.add(new StoredField("id", id));
        w.addDocument(doc);
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    DirectoryReader r = w.getReader();
    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
    }
    // can't wrap because we require Lucene60PointsFormat directly but e.g. ParallelReader wraps with its own points impl:
    IndexSearcher s = newSearcher(r, false);
    int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter);
        }
        double pointLat = GeoTestUtil.nextLatitude();
        double pointLon = GeoTestUtil.nextLongitude();
        // dumb brute force search to get the expected result:
        NearestHit[] expectedHits = new NearestHit[lats.length];
        for (int id = 0; id < lats.length; id++) {
            NearestHit hit = new NearestHit();
            hit.distanceMeters = SloppyMath.haversinMeters(pointLat, pointLon, lats[id], lons[id]);
            hit.docID = id;
            expectedHits[id] = hit;
        }
        Arrays.sort(expectedHits, new Comparator<NearestHit>() {

            @Override
            public int compare(NearestHit a, NearestHit b) {
                int cmp = Double.compare(a.distanceMeters, b.distanceMeters);
                if (cmp != 0) {
                    return cmp;
                }
                // tie break by smaller docID:
                return a.docID - b.docID;
            }
        });
        int topN = TestUtil.nextInt(random(), 1, lats.length);
        if (VERBOSE) {
            System.out.println("\nhits for pointLat=" + pointLat + " pointLon=" + pointLon);
        }
        // Also test with MatchAllDocsQuery, sorting by distance:
        TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon)));
        ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs;
        for (int i = 0; i < topN; i++) {
            NearestHit expected = expectedHits[i];
            FieldDoc expected2 = (FieldDoc) fieldDocs.scoreDocs[i];
            FieldDoc actual = (FieldDoc) hits[i];
            Document actualDoc = r.document(actual.doc);
            if (VERBOSE) {
                System.out.println("hit " + i);
                System.out.println("  expected id=" + expected.docID + " lat=" + lats[expected.docID] + " lon=" + lons[expected.docID] + " distance=" + expected.distanceMeters + " meters");
                System.out.println("  actual id=" + actualDoc.getField("id") + " distance=" + actual.fields[0] + " meters");
            }
            assertEquals(expected.docID, actual.doc);
            assertEquals(expected.distanceMeters, ((Double) actual.fields[0]).doubleValue(), 0.0);
            assertEquals(expected.docID, expected.docID);
            assertEquals(((Double) expected2.fields[0]).doubleValue(), expected.distanceMeters, 0.0);
        }
    }
    r.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) DirectoryReader(org.apache.lucene.index.DirectoryReader) NearestHit(org.apache.lucene.document.NearestNeighbor.NearestHit) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

FieldDoc (org.apache.lucene.search.FieldDoc)50 Sort (org.apache.lucene.search.Sort)26 TopFieldDocs (org.apache.lucene.search.TopFieldDocs)23 SortField (org.apache.lucene.search.SortField)21 ScoreDoc (org.apache.lucene.search.ScoreDoc)20 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18 IndexSearcher (org.apache.lucene.search.IndexSearcher)17 ArrayList (java.util.ArrayList)14 Directory (org.apache.lucene.store.Directory)14 TopDocs (org.apache.lucene.search.TopDocs)12 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)10 TermQuery (org.apache.lucene.search.TermQuery)10 Document (org.apache.lucene.document.Document)9 Term (org.apache.lucene.index.Term)9 BytesRef (org.apache.lucene.util.BytesRef)9 HashMap (java.util.HashMap)8 Query (org.apache.lucene.search.Query)8 IndexReader (org.apache.lucene.index.IndexReader)7 HashSet (java.util.HashSet)5 DirectoryReader (org.apache.lucene.index.DirectoryReader)5