Search in sources :

Example 36 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TestFieldCacheSortRandom method testRandomStringSort.

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();
        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }
            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }
            doc.add(new StringField("stringdv", s, Field.Store.NO));
            docValues.add(new BytesRef(s));
        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }
        doc.add(new IntPoint("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;
        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }
    Map<String, UninvertingReader.Type> mapping = new HashMap<>();
    mapping.put("stringdv", Type.SORTED);
    mapping.put("id", Type.INTEGER_POINT);
    final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }
    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();
        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        final boolean missingIsNull;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        missingIsNull = true;
        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }
        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        int queryType = random.nextInt(2);
        if (queryType == 0) {
            hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(), random.nextBoolean());
        } else {
            hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        }
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }
        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {

            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });
        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                if (br == null && missingIsNull == false) {
                    br = new BytesRef();
                }
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }
        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];
                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            if (br == null && missingIsNull == false) {
                br = new BytesRef();
            }
            // Normally, the old codecs (that don't support
            // docsWithField via doc values) will always return
            // an empty BytesRef for the missing case; however,
            // if all docs in a given segment were missing, in
            // that case it will return null!  So we must map
            // null here, too:
            BytesRef br2 = (BytesRef) fd.fields[0];
            if (br2 == null && missingIsNull == false) {
                br2 = new BytesRef();
            }
            assertEquals(br, br2);
        }
    }
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) Random(java.util.Random) Sort(org.apache.lucene.search.Sort) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) IntPoint(org.apache.lucene.document.IntPoint) IntPoint(org.apache.lucene.document.IntPoint) Type(org.apache.solr.uninverting.UninvertingReader.Type) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 37 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TopGroups method merge.

/** Merges an array of TopGroups, for example obtained
   *  from the second-pass collector across multiple
   *  shards.  Each TopGroups must have been sorted by the
   *  same groupSort and docSort, and the top groups passed
   *  to all second-pass collectors must be the same.
   *
   * <b>NOTE</b>: We can't always compute an exact totalGroupCount.
   * Documents belonging to a group may occur on more than
   * one shard and thus the merged totalGroupCount can be
   * higher than the actual totalGroupCount. In this case the
   * totalGroupCount represents a upper bound. If the documents
   * of one group do only reside in one shard then the
   * totalGroupCount is exact.
   *
   * <b>NOTE</b>: the topDocs in each GroupDocs is actually
   * an instance of TopDocsAndShards
   */
public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN, ScoreMergeMode scoreMergeMode) {
    if (shardGroups.length == 0) {
        return null;
    }
    int totalHitCount = 0;
    int totalGroupedHitCount = 0;
    // Optionally merge the totalGroupCount.
    Integer totalGroupCount = null;
    final int numGroups = shardGroups[0].groups.length;
    for (TopGroups<T> shard : shardGroups) {
        if (numGroups != shard.groups.length) {
            throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
        }
        totalHitCount += shard.totalHitCount;
        totalGroupedHitCount += shard.totalGroupedHitCount;
        if (shard.totalGroupCount != null) {
            if (totalGroupCount == null) {
                totalGroupCount = 0;
            }
            totalGroupCount += shard.totalGroupCount;
        }
    }
    @SuppressWarnings({ "unchecked", "rawtypes" }) final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
    final TopDocs[] shardTopDocs;
    if (docSort.equals(Sort.RELEVANCE)) {
        shardTopDocs = new TopDocs[shardGroups.length];
    } else {
        shardTopDocs = new TopFieldDocs[shardGroups.length];
    }
    float totalMaxScore = Float.MIN_VALUE;
    for (int groupIDX = 0; groupIDX < numGroups; groupIDX++) {
        final T groupValue = shardGroups[0].groups[groupIDX].groupValue;
        //System.out.println("  merge groupValue=" + groupValue + " sortValues=" + Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
        float maxScore = Float.MIN_VALUE;
        int totalHits = 0;
        double scoreSum = 0.0;
        for (int shardIDX = 0; shardIDX < shardGroups.length; shardIDX++) {
            //System.out.println("    shard=" + shardIDX);
            final TopGroups<T> shard = shardGroups[shardIDX];
            final GroupDocs<?> shardGroupDocs = shard.groups[groupIDX];
            if (groupValue == null) {
                if (shardGroupDocs.groupValue != null) {
                    throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
                }
            } else if (!groupValue.equals(shardGroupDocs.groupValue)) {
                throw new IllegalArgumentException("group values differ across shards; you must pass same top groups to all shards' second-pass collector");
            }
            if (docSort.equals(Sort.RELEVANCE)) {
                shardTopDocs[shardIDX] = new TopDocs(shardGroupDocs.totalHits, shardGroupDocs.scoreDocs, shardGroupDocs.maxScore);
            } else {
                shardTopDocs[shardIDX] = new TopFieldDocs(shardGroupDocs.totalHits, shardGroupDocs.scoreDocs, docSort.getSort(), shardGroupDocs.maxScore);
            }
            maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
            totalHits += shardGroupDocs.totalHits;
            scoreSum += shardGroupDocs.score;
        }
        final TopDocs mergedTopDocs;
        if (docSort.equals(Sort.RELEVANCE)) {
            mergedTopDocs = TopDocs.merge(docOffset + docTopN, shardTopDocs);
        } else {
            mergedTopDocs = TopDocs.merge(docSort, docOffset + docTopN, (TopFieldDocs[]) shardTopDocs);
        }
        // Slice;
        final ScoreDoc[] mergedScoreDocs;
        if (docOffset == 0) {
            mergedScoreDocs = mergedTopDocs.scoreDocs;
        } else if (docOffset >= mergedTopDocs.scoreDocs.length) {
            mergedScoreDocs = new ScoreDoc[0];
        } else {
            mergedScoreDocs = new ScoreDoc[mergedTopDocs.scoreDocs.length - docOffset];
            System.arraycopy(mergedTopDocs.scoreDocs, docOffset, mergedScoreDocs, 0, mergedTopDocs.scoreDocs.length - docOffset);
        }
        final float groupScore;
        switch(scoreMergeMode) {
            case None:
                groupScore = Float.NaN;
                break;
            case Avg:
                if (totalHits > 0) {
                    groupScore = (float) (scoreSum / totalHits);
                } else {
                    groupScore = Float.NaN;
                }
                break;
            case Total:
                groupScore = (float) scoreSum;
                break;
            default:
                throw new IllegalArgumentException("can't handle ScoreMergeMode " + scoreMergeMode);
        }
        //System.out.println("SHARDS=" + Arrays.toString(mergedTopDocs.shardIndex));
        mergedGroupDocs[groupIDX] = new GroupDocs<>(groupScore, maxScore, totalHits, mergedScoreDocs, groupValue, shardGroups[0].groups[groupIDX].groupSortValues);
        totalMaxScore = Math.max(totalMaxScore, maxScore);
    }
    if (totalGroupCount != null) {
        TopGroups<T> result = new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
        return new TopGroups<>(result, totalGroupCount);
    } else {
        return new TopGroups<>(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, mergedGroupDocs, totalMaxScore);
    }
}
Also used : TopFieldDocs(org.apache.lucene.search.TopFieldDocs) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs)

Example 38 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TestDemoExpressions method testTwoOfSameBinding.

/** tests same binding used more than once in an expression */
public void testTwoOfSameBinding() throws Exception {
    Expression expr = JavascriptCompiler.compile("_score + _score");
    SimpleBindings bindings = new SimpleBindings();
    bindings.add(new SortField("_score", SortField.Type.SCORE));
    Sort sort = new Sort(expr.getSortField(bindings, true));
    Query query = new TermQuery(new Term("body", "contents"));
    TopFieldDocs td = searcher.search(query, 3, sort, true, true);
    for (int i = 0; i < 3; i++) {
        FieldDoc d = (FieldDoc) td.scoreDocs[i];
        float expected = 2 * d.score;
        float actual = ((Double) d.fields[0]).floatValue();
        assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) Sort(org.apache.lucene.search.Sort) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term)

Example 39 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TestDemoExpressions method testDynamicExtendedVariableExample.

public void testDynamicExtendedVariableExample() throws Exception {
    Expression popularity = JavascriptCompiler.compile("doc['popularity'].value + magicarray[0] + fourtytwo");
    // The following is an example of how to write bindings which parse the variable name into pieces.
    // Note, however, that this requires a lot of error checking.  Each "error case" below should be
    // filled in with proper error messages for a real use case.
    Bindings bindings = new Bindings() {

        @Override
        public DoubleValuesSource getDoubleValuesSource(String name) {
            VariableContext[] var = VariableContext.parse(name);
            assert var[0].type == MEMBER;
            String base = var[0].text;
            if (base.equals("doc")) {
                if (var.length > 1 && var[1].type == STR_INDEX) {
                    String field = var[1].text;
                    if (var.length > 2 && var[2].type == MEMBER && var[2].text.equals("value")) {
                        return DoubleValuesSource.fromIntField(field);
                    } else {
                        // error case, non/missing "value" member access
                        fail("member: " + var[2].text);
                    }
                } else {
                    // error case, doc should be a str indexed array
                    fail();
                }
            } else if (base.equals("magicarray")) {
                if (var.length > 1 && var[1].type == INT_INDEX) {
                    return DoubleValuesSource.constant(2048);
                } else {
                    // error case, magic array isn't an array
                    fail();
                }
            } else if (base.equals("fourtytwo")) {
                return DoubleValuesSource.constant(42);
            } else {
                // error case (variable doesn't exist)
                fail();
            }
            throw new IllegalArgumentException("Illegal reference '" + name + "'");
        }
    };
    Sort sort = new Sort(popularity.getSortField(bindings, false));
    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
    FieldDoc d = (FieldDoc) td.scoreDocs[0];
    assertEquals(2092D, (Double) d.fields[0], 1E-4);
    d = (FieldDoc) td.scoreDocs[1];
    assertEquals(2095D, (Double) d.fields[0], 1E-4);
    d = (FieldDoc) td.scoreDocs[2];
    assertEquals(2110D, (Double) d.fields[0], 1E-4);
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) Sort(org.apache.lucene.search.Sort) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) VariableContext(org.apache.lucene.expressions.js.VariableContext) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery)

Example 40 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TestDemoExpressions method testSortValues.

/** tests the returned sort values are correct */
public void testSortValues() throws Exception {
    Expression expr = JavascriptCompiler.compile("sqrt(_score)");
    SimpleBindings bindings = new SimpleBindings();
    bindings.add(new SortField("_score", SortField.Type.SCORE));
    Sort sort = new Sort(expr.getSortField(bindings, true));
    Query query = new TermQuery(new Term("body", "contents"));
    TopFieldDocs td = searcher.search(query, 3, sort, true, true);
    for (int i = 0; i < 3; i++) {
        FieldDoc d = (FieldDoc) td.scoreDocs[i];
        float expected = (float) Math.sqrt(d.score);
        float actual = ((Double) d.fields[0]).floatValue();
        assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) Sort(org.apache.lucene.search.Sort) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term)

Aggregations

TopFieldDocs (org.apache.lucene.search.TopFieldDocs)41 Sort (org.apache.lucene.search.Sort)30 SortField (org.apache.lucene.search.SortField)24 FieldDoc (org.apache.lucene.search.FieldDoc)23 IndexSearcher (org.apache.lucene.search.IndexSearcher)19 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18 TermQuery (org.apache.lucene.search.TermQuery)15 Document (org.apache.lucene.document.Document)14 ScoreDoc (org.apache.lucene.search.ScoreDoc)12 ArrayList (java.util.ArrayList)11 Term (org.apache.lucene.index.Term)11 Query (org.apache.lucene.search.Query)11 BytesRef (org.apache.lucene.util.BytesRef)9 TopDocs (org.apache.lucene.search.TopDocs)8 Directory (org.apache.lucene.store.Directory)8 HashMap (java.util.HashMap)6 StringField (org.apache.lucene.document.StringField)6 CollapseTopFieldDocs (org.apache.lucene.search.grouping.CollapseTopFieldDocs)5 IOException (java.io.IOException)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4