Search in sources :

Example 61 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class FuzzyLikeThisQueryTest method testMultiWord.

//Test multiple input words are having variants produced
public void testMultiWord() throws Throwable {
    FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
    flt.addTerms("jonathin smoth", "name", 2, 1);
    Query q = flt.rewrite(searcher.getIndexReader());
    HashSet<Term> queryTerms = new HashSet<>();
    searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
    assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
    assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
    TopDocs topDocs = searcher.search(flt, 1);
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
    Document doc = searcher.doc(sd[0].doc);
    assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) Query(org.apache.lucene.search.Query) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) HashSet(java.util.HashSet) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 62 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class FuzzyLikeThisQueryTest method testClosestEditDistanceMatchComesFirst.

//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
    FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
    flt.addTerms("smith", "name", 2, 1);
    Query q = flt.rewrite(searcher.getIndexReader());
    HashSet<Term> queryTerms = new HashSet<>();
    searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
    assertTrue("Should have variant smythe", queryTerms.contains(new Term("name", "smythe")));
    assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
    assertTrue("Should have variant smyth", queryTerms.contains(new Term("name", "smyth")));
    TopDocs topDocs = searcher.search(flt, 1);
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
    Document doc = searcher.doc(sd[0].doc);
    assertEquals("Should match most similar not most rare variant", "2", doc.get("id"));
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) Query(org.apache.lucene.search.Query) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) HashSet(java.util.HashSet) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 63 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testDocValuesSimple.

/*
   * Simple test case to show how to use the API
   */
public void testDocValuesSimple() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    IndexWriter writer = new IndexWriter(dir, conf);
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField("docId", i));
        doc.add(new TextField("docId", "" + i, Field.Store.NO));
        writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1, true);
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    assertEquals(1, reader.leaves().size());
    IndexSearcher searcher = new IndexSearcher(reader);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);
    TopDocs search = searcher.search(query.build(), 10);
    assertEquals(5, search.totalHits);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId");
    for (int i = 0; i < scoreDocs.length; i++) {
        assertEquals(i, scoreDocs[i].doc);
        assertEquals(i, docValues.advance(i));
        assertEquals(i, docValues.longValue());
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 64 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class ExpandComponent method process.

@SuppressWarnings("unchecked")
@Override
public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doExpand) {
        return;
    }
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String field = params.get(ExpandParams.EXPAND_FIELD);
    String hint = null;
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                    hint = cp.hint;
                }
            }
        }
    }
    if (field == null) {
        throw new IOException("Expand field is null.");
    }
    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);
    Sort sort = null;
    if (sortParam != null) {
        sort = SortSpecParsing.parseSortSpec(sortParam, rb.req).getSort();
    }
    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    List<Query> newFilters = new ArrayList<>();
    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    SolrIndexSearcher searcher = req.getSearcher();
    LeafReader reader = searcher.getSlowAtomicReader();
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    SortedDocValues values = null;
    long nullValue = 0L;
    if (fieldType instanceof StrField) {
        //Get The Top Level SortedDocValues
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
    } else {
        //Get the nullValue for the numeric collapse field
        String defaultValue = searcher.getSchema().getField(field).getDefaultValue();
        final NumberType numType = fieldType.getNumberType();
        // we don't need to handle invalid 64-bit field types here.
        if (defaultValue != null) {
            if (numType == NumberType.INTEGER) {
                nullValue = Long.parseLong(defaultValue);
            } else if (numType == NumberType.FLOAT) {
                nullValue = Float.floatToIntBits(Float.parseFloat(defaultValue));
            }
        } else if (NumberType.FLOAT.equals(numType)) {
            // Integer case already handled by nullValue defaulting to 0
            nullValue = Float.floatToIntBits(0.0f);
        }
    }
    FixedBitSet groupBits = null;
    LongHashSet groupSet = null;
    DocList docList = rb.getResults().docList;
    IntHashSet collapsedSet = new IntHashSet(docList.size() * 2);
    //Gather the groups for the current page of documents
    DocIterator idit = docList.iterator();
    int[] globalDocs = new int[docList.size()];
    int docsIndex = -1;
    while (idit.hasNext()) {
        globalDocs[++docsIndex] = idit.nextDoc();
    }
    Arrays.sort(globalDocs);
    Query groupQuery = null;
    /*
    * This code gathers the group information for the current page.
    */
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    if (contexts.size() == 0) {
        //When no context is available we can skip the expanding
        return;
    }
    int currentContext = 0;
    int currentDocBase = contexts.get(currentContext).docBase;
    int nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
    IntObjectHashMap<BytesRef> ordBytes = null;
    if (values != null) {
        groupBits = new FixedBitSet(values.getValueCount());
        MultiDocValues.OrdinalMap ordinalMap = null;
        SortedDocValues[] sortedDocValues = null;
        LongValues segmentOrdinalMap = null;
        SortedDocValues currentValues = null;
        if (values instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) values).mapping;
            sortedDocValues = ((MultiDocValues.MultiSortedDocValues) values).values;
            currentValues = sortedDocValues[currentContext];
            segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
        }
        int count = 0;
        ordBytes = new IntObjectHashMap<>();
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = (currentContext + 1) < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                if (ordinalMap != null) {
                    currentValues = sortedDocValues[currentContext];
                    segmentOrdinalMap = ordinalMap.getGlobalOrds(currentContext);
                }
            }
            int contextDoc = globalDoc - currentDocBase;
            if (ordinalMap != null) {
                if (contextDoc > currentValues.docID()) {
                    currentValues.advance(contextDoc);
                }
                if (contextDoc == currentValues.docID()) {
                    int ord = currentValues.ordValue();
                    ++count;
                    BytesRef ref = currentValues.lookupOrd(ord);
                    ord = (int) segmentOrdinalMap.get(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            } else {
                if (globalDoc > values.docID()) {
                    values.advance(globalDoc);
                }
                if (globalDoc == values.docID()) {
                    int ord = values.ordValue();
                    ++count;
                    BytesRef ref = values.lookupOrd(ord);
                    ordBytes.put(ord, BytesRef.deepCopyOf(ref));
                    groupBits.set(ord);
                    collapsedSet.add(globalDoc);
                }
            }
        }
        if (count > 0 && count < 200) {
            try {
                groupQuery = getGroupQuery(field, count, ordBytes);
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    } else {
        groupSet = new LongHashSet(docList.size());
        NumericDocValues collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
        int count = 0;
        for (int i = 0; i < globalDocs.length; i++) {
            int globalDoc = globalDocs[i];
            while (globalDoc >= nextDocBase) {
                currentContext++;
                currentDocBase = contexts.get(currentContext).docBase;
                nextDocBase = currentContext + 1 < contexts.size() ? contexts.get(currentContext + 1).docBase : Integer.MAX_VALUE;
                collapseValues = contexts.get(currentContext).reader().getNumericDocValues(field);
            }
            int contextDoc = globalDoc - currentDocBase;
            int valueDocID = collapseValues.docID();
            if (valueDocID < contextDoc) {
                valueDocID = collapseValues.advance(contextDoc);
            }
            long value;
            if (valueDocID == contextDoc) {
                value = collapseValues.longValue();
            } else {
                value = 0;
            }
            if (value != nullValue) {
                ++count;
                groupSet.add(value);
                collapsedSet.add(globalDoc);
            }
        }
        if (count > 0 && count < 200) {
            if (fieldType.isPointField()) {
                groupQuery = getPointGroupQuery(schemaField, count, groupSet);
            } else {
                groupQuery = getGroupQuery(field, fieldType, count, groupSet);
            }
        }
    }
    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    Collector groupExpandCollector = null;
    if (values != null) {
        //Get The Top Level SortedDocValues again so we can re-iterate:
        if (CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
            Map<String, UninvertingReader.Type> mapping = new HashMap();
            mapping.put(field, UninvertingReader.Type.SORTED);
            UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
            values = uninvertingReader.getSortedDocValues(field);
        } else {
            values = DocValues.getSorted(reader, field);
        }
        groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort);
    } else {
        groupExpandCollector = new NumericGroupExpandCollector(field, nullValue, groupSet, collapsedSet, limit, sort);
    }
    if (groupQuery != null) {
        //Limits the results to documents that are in the same group as the documents in the page.
        newFilters.add(groupQuery);
    }
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }
    if (pfilter.filter == null) {
        searcher.search(query, collector);
    } else {
        Query q = new BooleanQuery.Builder().add(query, Occur.MUST).add(pfilter.filter, Occur.FILTER).build();
        searcher.search(q, collector);
    }
    LongObjectMap<Collector> groups = ((GroupCollector) groupExpandCollector).getGroups();
    NamedList outMap = new SimpleOrderedMap();
    CharsRefBuilder charsRef = new CharsRefBuilder();
    for (LongObjectCursor<Collector> cursor : groups) {
        long groupValue = cursor.key;
        TopDocsCollector<?> topDocsCollector = TopDocsCollector.class.cast(cursor.value);
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore());
            if (fieldType instanceof StrField) {
                final BytesRef bytesRef = ordBytes.get((int) groupValue);
                fieldType.indexedToReadable(bytesRef, charsRef);
                String group = charsRef.toString();
                outMap.add(group, slice);
            } else {
                outMap.add(numericToString(fieldType, groupValue), slice);
            }
        }
    }
    rb.rsp.add("expanded", outMap);
}
Also used : StrField(org.apache.solr.schema.StrField) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) LongObjectHashMap(com.carrotsearch.hppc.LongObjectHashMap) IntObjectHashMap(com.carrotsearch.hppc.IntObjectHashMap) ArrayList(java.util.ArrayList) IntHashSet(com.carrotsearch.hppc.IntHashSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) DocSlice(org.apache.solr.search.DocSlice) ScoreDoc(org.apache.lucene.search.ScoreDoc) FixedBitSet(org.apache.lucene.util.FixedBitSet) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) SortedDocValues(org.apache.lucene.index.SortedDocValues) LongHashSet(com.carrotsearch.hppc.LongHashSet) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) NumberType(org.apache.solr.schema.NumberType) QParser(org.apache.solr.search.QParser) SolrParams(org.apache.solr.common.params.SolrParams) NumericDocValues(org.apache.lucene.index.NumericDocValues) DocIterator(org.apache.solr.search.DocIterator) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) UninvertingReader(org.apache.solr.uninverting.UninvertingReader) TopDocs(org.apache.lucene.search.TopDocs) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) LeafCollector(org.apache.lucene.search.LeafCollector) Collector(org.apache.lucene.search.Collector) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) TopDocsCollector(org.apache.lucene.search.TopDocsCollector) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) FilterLeafReader(org.apache.lucene.index.FilterLeafReader) NamedList(org.apache.solr.common.util.NamedList) IOException(java.io.IOException) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) FieldType(org.apache.solr.schema.FieldType) CollapsingQParserPlugin(org.apache.solr.search.CollapsingQParserPlugin) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) FieldType(org.apache.solr.schema.FieldType) DocValuesType(org.apache.lucene.index.DocValuesType) LongValues(org.apache.lucene.util.LongValues) DocList(org.apache.solr.search.DocList)

Example 65 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.

the class AllGroupHeadsCollectorTest method testRandom.

public void testRandom() throws Exception {
    int numberOfRuns = TestUtil.nextInt(random(), 3, 6);
    for (int iter = 0; iter < numberOfRuns; iter++) {
        if (VERBOSE) {
            System.out.println(String.format(Locale.ROOT, "TEST: iter=%d total=%d", iter, numberOfRuns));
        }
        final int numDocs = TestUtil.nextInt(random(), 100, 1000) * RANDOM_MULTIPLIER;
        final int numGroups = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
        }
        final List<BytesRef> groups = new ArrayList<>();
        for (int i = 0; i < numGroups; i++) {
            String randomValue;
            do {
                // B/c of DV based impl we can't see the difference between an empty string and a null value.
                // For that reason we don't generate empty string groups.
                randomValue = TestUtil.randomRealisticUnicodeString(random());
            //randomValue = TestUtil.randomSimpleString(random());
            } while ("".equals(randomValue));
            groups.add(new BytesRef(randomValue));
        }
        final String[] contentStrings = new String[TestUtil.nextInt(random(), 2, 20)];
        if (VERBOSE) {
            System.out.println("TEST: create fake content");
        }
        for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            sb.append("real").append(random().nextInt(3)).append(' ');
            final int fakeCount = random().nextInt(10);
            for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) {
                sb.append("fake ");
            }
            contentStrings[contentIDX] = sb.toString();
            if (VERBOSE) {
                System.out.println("  content=" + sb.toString());
            }
        }
        Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
        DocValuesType valueType = DocValuesType.SORTED;
        Document doc = new Document();
        Document docNoGroup = new Document();
        Field valuesField = null;
        valuesField = new SortedDocValuesField("group", new BytesRef());
        doc.add(valuesField);
        Field sort1 = new SortedDocValuesField("sort1", new BytesRef());
        doc.add(sort1);
        docNoGroup.add(sort1);
        Field sort2 = new SortedDocValuesField("sort2", new BytesRef());
        doc.add(sort2);
        docNoGroup.add(sort2);
        Field sort3 = new SortedDocValuesField("sort3", new BytesRef());
        doc.add(sort3);
        docNoGroup.add(sort3);
        Field content = newTextField("content", "", Field.Store.NO);
        doc.add(content);
        docNoGroup.add(content);
        NumericDocValuesField idDV = new NumericDocValuesField("id", 0);
        doc.add(idDV);
        docNoGroup.add(idDV);
        final GroupDoc[] groupDocs = new GroupDoc[numDocs];
        for (int i = 0; i < numDocs; i++) {
            final BytesRef groupValue;
            if (random().nextInt(24) == 17) {
                // So we test the "doc doesn't have the group'd
                // field" case:
                groupValue = null;
            } else {
                groupValue = groups.get(random().nextInt(groups.size()));
            }
            final GroupDoc groupDoc = new GroupDoc(i, groupValue, groups.get(random().nextInt(groups.size())), groups.get(random().nextInt(groups.size())), new BytesRef(String.format(Locale.ROOT, "%05d", i)), contentStrings[random().nextInt(contentStrings.length)]);
            if (VERBOSE) {
                System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString() + " sort3=" + groupDoc.sort3.utf8ToString());
            }
            groupDocs[i] = groupDoc;
            if (groupDoc.group != null) {
                valuesField.setBytesValue(new BytesRef(groupDoc.group.utf8ToString()));
            }
            sort1.setBytesValue(groupDoc.sort1);
            sort2.setBytesValue(groupDoc.sort2);
            sort3.setBytesValue(groupDoc.sort3);
            content.setStringValue(groupDoc.content);
            idDV.setLongValue(groupDoc.id);
            if (groupDoc.group == null) {
                w.addDocument(docNoGroup);
            } else {
                w.addDocument(doc);
            }
        }
        final DirectoryReader r = w.getReader();
        w.close();
        NumericDocValues values = MultiDocValues.getNumericValues(r, "id");
        final int[] docIDToFieldId = new int[numDocs];
        final int[] fieldIdToDocID = new int[numDocs];
        for (int i = 0; i < numDocs; i++) {
            assertEquals(i, values.nextDoc());
            int fieldId = (int) values.longValue();
            docIDToFieldId[i] = fieldId;
            fieldIdToDocID[fieldId] = i;
        }
        final IndexSearcher s = newSearcher(r);
        Set<Integer> seenIDs = new HashSet<>();
        for (int contentID = 0; contentID < 3; contentID++) {
            final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
            for (ScoreDoc hit : hits) {
                int idValue = docIDToFieldId[hit.doc];
                final GroupDoc gd = groupDocs[idValue];
                assertEquals(gd.id, idValue);
                seenIDs.add(idValue);
                assertTrue(gd.score == 0.0);
                gd.score = hit.score;
            }
        }
        // make sure all groups were seen across the hits
        assertEquals(groupDocs.length, seenIDs.size());
        // make sure scores are sane
        for (GroupDoc gd : groupDocs) {
            assertTrue(Float.isFinite(gd.score));
            assertTrue(gd.score >= 0.0);
        }
        for (int searchIter = 0; searchIter < 100; searchIter++) {
            if (VERBOSE) {
                System.out.println("TEST: searchIter=" + searchIter);
            }
            final String searchTerm = "real" + random().nextInt(3);
            boolean sortByScoreOnly = random().nextBoolean();
            Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
            AllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
            s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
            int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
            int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
            // The actual group heads contains Lucene ids. Need to change them into our id value.
            for (int i = 0; i < actualGroupHeads.length; i++) {
                actualGroupHeads[i] = docIDToFieldId[actualGroupHeads[i]];
            }
            // Allows us the easily iterate and assert the actual and expected results.
            Arrays.sort(expectedGroupHeads);
            Arrays.sort(actualGroupHeads);
            if (VERBOSE) {
                System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName());
                System.out.println("Sort within group: " + sortWithinGroup);
                System.out.println("Num group: " + numGroups);
                System.out.println("Num doc: " + numDocs);
                System.out.println("\n=== Expected: \n");
                for (int expectedDocId : expectedGroupHeads) {
                    GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                    String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.utf8ToString(), expectedGroupDoc.sort2.utf8ToString(), expectedGroupDoc.sort3.utf8ToString(), expectedDocId));
                }
                System.out.println("\n=== Actual: \n");
                for (int actualDocId : actualGroupHeads) {
                    GroupDoc actualGroupDoc = groupDocs[actualDocId];
                    String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.utf8ToString();
                    System.out.println(String.format(Locale.ROOT, "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.utf8ToString(), actualGroupDoc.sort2.utf8ToString(), actualGroupDoc.sort3.utf8ToString(), actualDocId));
                }
                System.out.println("\n===================================================================================");
            }
            assertArrayEquals(expectedGroupHeads, actualGroupHeads);
        }
        r.close();
        dir.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DocValuesType(org.apache.lucene.index.DocValuesType) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

ScoreDoc (org.apache.lucene.search.ScoreDoc)222 TopDocs (org.apache.lucene.search.TopDocs)124 IndexSearcher (org.apache.lucene.search.IndexSearcher)98 Document (org.apache.lucene.document.Document)95 Query (org.apache.lucene.search.Query)71 TermQuery (org.apache.lucene.search.TermQuery)52 IOException (java.io.IOException)48 ArrayList (java.util.ArrayList)46 IndexReader (org.apache.lucene.index.IndexReader)45 Term (org.apache.lucene.index.Term)39 Directory (org.apache.lucene.store.Directory)37 BooleanQuery (org.apache.lucene.search.BooleanQuery)27 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)26 Test (org.junit.Test)23 Sort (org.apache.lucene.search.Sort)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)21 HashMap (java.util.HashMap)20 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)20 FieldDoc (org.apache.lucene.search.FieldDoc)20 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)18