Search in sources :

Example 16 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testVeryLargeButLegalSortedBytes.

public void testVeryLargeButLegalSortedBytes() throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf);
    Document doc = new Document();
    byte[] bytes = new byte[32766];
    BytesRef b = new BytesRef(bytes);
    random().nextBytes(bytes);
    doc.add(new SortedDocValuesField("dv", b));
    iwriter.addDocument(doc);
    iwriter.close();
    // Now search the index:
    // read-only=true
    IndexReader ireader = DirectoryReader.open(directory);
    assert ireader.leaves().size() == 1;
    BinaryDocValues dv = DocValues.getBinary(ireader.leaves().get(0).reader(), "dv");
    assertEquals(0, dv.nextDoc());
    assertEquals(new BytesRef(bytes), dv.binaryValue());
    ireader.close();
    directory.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 17 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestDrillSideways method testRandom.

public void testRandom() throws Exception {
    while (aChance == 0.0) {
        aChance = random().nextDouble();
    }
    while (bChance == 0.0) {
        bChance = random().nextDouble();
    }
    while (cChance == 0.0) {
        cChance = random().nextDouble();
    }
    //aChance = .01;
    //bChance = 0.5;
    //cChance = 1.0;
    double sum = aChance + bChance + cChance;
    aChance /= sum;
    bChance /= sum;
    cChance /= sum;
    int numDims = TestUtil.nextInt(random(), 2, 5);
    //int numDims = 3;
    int numDocs = atLeast(3000);
    //int numDocs = 20;
    if (VERBOSE) {
        System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
    }
    String[][] dimValues = new String[numDims][];
    int valueCount = 2;
    for (int dim = 0; dim < numDims; dim++) {
        Set<String> values = new HashSet<>();
        while (values.size() < valueCount) {
            String s = TestUtil.randomRealisticUnicodeString(random());
            //String s = _TestUtil.randomString(random());
            if (s.length() > 0) {
                values.add(s);
            }
        }
        dimValues[dim] = values.toArray(new String[values.size()]);
        valueCount *= 2;
    }
    List<Doc> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
        Doc doc = new Doc();
        doc.id = "" + i;
        doc.contentToken = randomContentToken(false);
        doc.dims = new int[numDims];
        doc.dims2 = new int[numDims];
        for (int dim = 0; dim < numDims; dim++) {
            if (random().nextInt(5) == 3) {
                // This doc is missing this dim:
                doc.dims[dim] = -1;
            } else if (dimValues[dim].length <= 4) {
                int dimUpto = 0;
                doc.dims[dim] = dimValues[dim].length - 1;
                while (dimUpto < dimValues[dim].length) {
                    if (random().nextBoolean()) {
                        doc.dims[dim] = dimUpto;
                        break;
                    }
                    dimUpto++;
                }
            } else {
                doc.dims[dim] = random().nextInt(dimValues[dim].length);
            }
            if (random().nextInt(5) == 3) {
                // 2nd value:
                doc.dims2[dim] = random().nextInt(dimValues[dim].length);
            } else {
                doc.dims2[dim] = -1;
            }
        }
        docs.add(doc);
    }
    Directory d = newDirectory();
    Directory td = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setInfoStream(InfoStream.NO_OUTPUT);
    RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
    FacetsConfig config = new FacetsConfig();
    for (int i = 0; i < numDims; i++) {
        config.setMultiValued("dim" + i, true);
    }
    boolean doUseDV = random().nextBoolean();
    for (Doc rawDoc : docs) {
        Document doc = new Document();
        doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
        doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
        doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
        if (VERBOSE) {
            System.out.println("  doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
        }
        for (int dim = 0; dim < numDims; dim++) {
            int dimValue = rawDoc.dims[dim];
            if (dimValue != -1) {
                if (doUseDV) {
                    doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
                } else {
                    doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
                }
                doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
                if (VERBOSE) {
                    System.out.println("    dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
                }
            }
            int dimValue2 = rawDoc.dims2[dim];
            if (dimValue2 != -1) {
                if (doUseDV) {
                    doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
                } else {
                    doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
                }
                doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
                if (VERBOSE) {
                    System.out.println("      dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
                }
            }
        }
        w.addDocument(config.build(tw, doc));
    }
    if (random().nextBoolean()) {
        // Randomly delete a few docs:
        int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
        if (VERBOSE) {
            System.out.println("delete " + numDel);
        }
        int delCount = 0;
        while (delCount < numDel) {
            Doc doc = docs.get(random().nextInt(docs.size()));
            if (!doc.deleted) {
                if (VERBOSE) {
                    System.out.println("  delete id=" + doc.id);
                }
                doc.deleted = true;
                w.deleteDocuments(new Term("id", doc.id));
                delCount++;
            }
        }
    }
    if (random().nextBoolean()) {
        if (VERBOSE) {
            System.out.println("TEST: forceMerge(1)...");
        }
        w.forceMerge(1);
    }
    IndexReader r = w.getReader();
    final SortedSetDocValuesReaderState sortedSetDVState;
    IndexSearcher s = newSearcher(r);
    if (doUseDV) {
        sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
    } else {
        sortedSetDVState = null;
    }
    if (VERBOSE) {
        System.out.println("r.numDocs() = " + r.numDocs());
    }
    // NRT open
    TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    int numIters = atLeast(10);
    for (int iter = 0; iter < numIters; iter++) {
        String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
        int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
        }
        String[][] drillDowns = new String[numDims][];
        int count = 0;
        boolean anyMultiValuedDrillDowns = false;
        while (count < numDrillDown) {
            int dim = random().nextInt(numDims);
            if (drillDowns[dim] == null) {
                if (random().nextBoolean()) {
                    // Drill down on one value:
                    drillDowns[dim] = new String[] { dimValues[dim][random().nextInt(dimValues[dim].length)] };
                } else {
                    int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
                    drillDowns[dim] = new String[orCount];
                    anyMultiValuedDrillDowns |= orCount > 1;
                    for (int i = 0; i < orCount; i++) {
                        while (true) {
                            String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
                            for (int j = 0; j < i; j++) {
                                if (value.equals(drillDowns[dim][j])) {
                                    value = null;
                                    break;
                                }
                            }
                            if (value != null) {
                                drillDowns[dim][i] = value;
                                break;
                            }
                        }
                    }
                }
                if (VERBOSE) {
                    BytesRef[] values = new BytesRef[drillDowns[dim].length];
                    for (int i = 0; i < values.length; i++) {
                        values[i] = new BytesRef(drillDowns[dim][i]);
                    }
                    System.out.println("  dim" + dim + "=" + Arrays.toString(values));
                }
                count++;
            }
        }
        Query baseQuery;
        if (contentToken == null) {
            baseQuery = new MatchAllDocsQuery();
        } else {
            baseQuery = new TermQuery(new Term("content", contentToken));
        }
        DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
        for (int dim = 0; dim < numDims; dim++) {
            if (drillDowns[dim] != null) {
                for (String value : drillDowns[dim]) {
                    ddq.add("dim" + dim, value);
                }
            }
        }
        Query filter;
        if (random().nextInt(7) == 6) {
            if (VERBOSE) {
                System.out.println("  only-even filter");
            }
            filter = new Query() {

                @Override
                public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
                    return new ConstantScoreWeight(this, boost) {

                        @Override
                        public Scorer scorer(LeafReaderContext context) throws IOException {
                            DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                            return new ConstantScoreScorer(this, score(), new TwoPhaseIterator(approximation) {

                                @Override
                                public boolean matches() throws IOException {
                                    int docID = approximation.docID();
                                    return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
                                }

                                @Override
                                public float matchCost() {
                                    return 1000f;
                                }
                            });
                        }
                    };
                }

                @Override
                public String toString(String field) {
                    return "drillSidewaysTestFilter";
                }

                @Override
                public boolean equals(Object o) {
                    return o == this;
                }

                @Override
                public int hashCode() {
                    return System.identityHashCode(this);
                }
            };
        } else {
            filter = null;
        }
        // Verify docs are always collected in order.  If we
        // had an AssertingScorer it could catch it when
        // Weight.scoresDocsOutOfOrder lies!:
        getNewDrillSideways(s, config, tr).search(ddq, new SimpleCollector() {

            int lastDocID;

            @Override
            public void collect(int doc) {
                assert doc > lastDocID;
                lastDocID = doc;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                lastDocID = -1;
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        // subScorers are on the same docID:
        if (!anyMultiValuedDrillDowns) {
            // Can only do this test when there are no OR'd
            // drill-down values, because in that case it's
            // easily possible for one of the DD terms to be on
            // a future docID:
            getNewDrillSidewaysScoreSubdocsAtOnce(s, config, tr).search(ddq, new AssertingSubDocsAtOnceCollector());
        }
        TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
        Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
        DrillSideways ds;
        if (doUseDV) {
            ds = getNewDrillSideways(s, config, sortedSetDVState);
        } else {
            ds = getNewDrillSidewaysBuildFacetsResult(s, config, tr);
        }
        // Retrieve all facets:
        DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
        TopDocs hits = s.search(baseQuery, numDocs);
        Map<String, Float> scores = new HashMap<>();
        for (ScoreDoc sd : hits.scoreDocs) {
            scores.put(s.doc(sd.doc).get("id"), sd.score);
        }
        if (VERBOSE) {
            System.out.println("  verify all facets");
        }
        verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
        // Make sure drill down doesn't change score:
        Query q = ddq;
        if (filter != null) {
            q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build();
        }
        TopDocs ddqHits = s.search(q, numDocs);
        assertEquals(expected.hits.size(), ddqHits.totalHits);
        for (int i = 0; i < expected.hits.size(); i++) {
            // Score should be IDENTICAL:
            assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
        }
    }
    w.close();
    IOUtils.close(r, tr, tw, d, td);
}
Also used : Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) Scorer(org.apache.lucene.search.Scorer) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) SimpleCollector(org.apache.lucene.search.SimpleCollector) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) ScoreDoc(org.apache.lucene.search.ScoreDoc) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Sort(org.apache.lucene.search.Sort) HashSet(java.util.HashSet) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) SortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState) DefaultSortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) Term(org.apache.lucene.index.Term) Weight(org.apache.lucene.search.Weight) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) DrillSidewaysResult(org.apache.lucene.facet.DrillSideways.DrillSidewaysResult) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TopDocs(org.apache.lucene.search.TopDocs) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight) DefaultSortedSetDocValuesReaderState(org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 18 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestValueSources method beforeClass.

@BeforeClass
public static void beforeClass() throws Exception {
    dir = newDirectory();
    analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
    iwConfig.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
    for (String[] doc : documents) {
        Document document = new Document();
        document.add(new StringField("id", doc[0], Field.Store.NO));
        document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
        document.add(new NumericDocValuesField("double", Double.doubleToRawLongBits(Double.parseDouble(doc[1]))));
        document.add(new NumericDocValuesField("float", Float.floatToRawIntBits(Float.parseFloat(doc[2]))));
        document.add(new NumericDocValuesField("int", Integer.parseInt(doc[3])));
        document.add(new NumericDocValuesField("long", Long.parseLong(doc[4])));
        document.add(new StringField("string", doc[5], Field.Store.NO));
        document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
        document.add(new TextField("text", doc[6], Field.Store.NO));
        document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[7]))));
        document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[8]))));
        document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[9]))));
        document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[7]))));
        document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[8]))));
        document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[9]))));
        document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[10])));
        document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[11])));
        document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[12])));
        document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[10])));
        document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[11])));
        document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[12])));
        iw.addDocument(document);
    }
    reader = iw.getReader();
    searcher = newSearcher(reader);
    iw.close();
}
Also used : SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) BeforeClass(org.junit.BeforeClass)

Example 19 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class BaseExplanationTestCase method createDoc.

public static Document createDoc(int index) {
    Document doc = new Document();
    doc.add(newStringField(KEY, "" + index, Field.Store.NO));
    doc.add(new SortedDocValuesField(KEY, new BytesRef("" + index)));
    Field f = newTextField(FIELD, docFields[index], Field.Store.NO);
    doc.add(f);
    doc.add(newTextField(ALTFIELD, docFields[index], Field.Store.NO));
    return doc;
}
Also used : SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef)

Example 20 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestTopDocsMerge method testSort.

void testSort(boolean useFrom) throws Exception {
    IndexReader reader = null;
    Directory dir = null;
    final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
    final String[] tokens = new String[] { "a", "b", "c", "d", "e" };
    if (VERBOSE) {
        System.out.println("TEST: make index");
    }
    {
        dir = newDirectory();
        final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
        // w.setDoRandomForceMerge(false);
        // w.w.getConfig().setMaxBufferedDocs(atLeast(100));
        final String[] content = new String[atLeast(20)];
        for (int contentIDX = 0; contentIDX < content.length; contentIDX++) {
            final StringBuilder sb = new StringBuilder();
            final int numTokens = TestUtil.nextInt(random(), 1, 10);
            for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) {
                sb.append(tokens[random().nextInt(tokens.length)]).append(' ');
            }
            content[contentIDX] = sb.toString();
        }
        for (int docIDX = 0; docIDX < numDocs; docIDX++) {
            final Document doc = new Document();
            doc.add(new SortedDocValuesField("string", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
            doc.add(newTextField("text", content[random().nextInt(content.length)], Field.Store.NO));
            doc.add(new FloatDocValuesField("float", random().nextFloat()));
            final int intValue;
            if (random().nextInt(100) == 17) {
                intValue = Integer.MIN_VALUE;
            } else if (random().nextInt(100) == 17) {
                intValue = Integer.MAX_VALUE;
            } else {
                intValue = random().nextInt();
            }
            doc.add(new NumericDocValuesField("int", intValue));
            if (VERBOSE) {
                System.out.println("  doc=" + doc);
            }
            w.addDocument(doc);
        }
        reader = w.getReader();
        w.close();
    }
    // NOTE: sometimes reader has just one segment, which is
    // important to test
    final IndexSearcher searcher = newSearcher(reader);
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final ShardSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new ShardSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new ShardSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final List<SortField> sortFields = new ArrayList<>();
    sortFields.add(new SortField("string", SortField.Type.STRING, true));
    sortFields.add(new SortField("string", SortField.Type.STRING, false));
    sortFields.add(new SortField("int", SortField.Type.INT, true));
    sortFields.add(new SortField("int", SortField.Type.INT, false));
    sortFields.add(new SortField("float", SortField.Type.FLOAT, true));
    sortFields.add(new SortField("float", SortField.Type.FLOAT, false));
    sortFields.add(new SortField(null, SortField.Type.SCORE, true));
    sortFields.add(new SortField(null, SortField.Type.SCORE, false));
    sortFields.add(new SortField(null, SortField.Type.DOC, true));
    sortFields.add(new SortField(null, SortField.Type.DOC, false));
    int numIters = atLeast(300);
    for (int iter = 0; iter < numIters; iter++) {
        // TODO: custom FieldComp...
        final Query query = new TermQuery(new Term("text", tokens[random().nextInt(tokens.length)]));
        final Sort sort;
        if (random().nextInt(10) == 4) {
            // Sort by score
            sort = null;
        } else {
            final SortField[] randomSortFields = new SortField[TestUtil.nextInt(random(), 1, 3)];
            for (int sortIDX = 0; sortIDX < randomSortFields.length; sortIDX++) {
                randomSortFields[sortIDX] = sortFields.get(random().nextInt(sortFields.size()));
            }
            sort = new Sort(randomSortFields);
        }
        final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
        if (VERBOSE) {
            System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
        }
        int from = -1;
        int size = -1;
        // First search on whole index:
        final TopDocs topHits;
        if (sort == null) {
            if (useFrom) {
                TopScoreDocCollector c = TopScoreDocCollector.create(numHits);
                searcher.search(query, c);
                from = TestUtil.nextInt(random(), 0, numHits - 1);
                size = numHits - from;
                TopDocs tempTopHits = c.topDocs();
                if (from < tempTopHits.scoreDocs.length) {
                    // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                    // than TopDocs#merge currently has
                    ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
                    System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
                    tempTopHits.scoreDocs = newScoreDocs;
                    topHits = tempTopHits;
                } else {
                    topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
                }
            } else {
                topHits = searcher.search(query, numHits);
            }
        } else {
            final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
            searcher.search(query, c);
            if (useFrom) {
                from = TestUtil.nextInt(random(), 0, numHits - 1);
                size = numHits - from;
                TopDocs tempTopHits = c.topDocs();
                if (from < tempTopHits.scoreDocs.length) {
                    // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
                    // than TopDocs#merge currently has
                    ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
                    System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
                    tempTopHits.scoreDocs = newScoreDocs;
                    topHits = tempTopHits;
                } else {
                    topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
                }
            } else {
                topHits = c.topDocs(0, numHits);
            }
        }
        if (VERBOSE) {
            if (useFrom) {
                System.out.println("from=" + from + " size=" + size);
            }
            System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
            if (topHits.scoreDocs != null) {
                for (int hitIDX = 0; hitIDX < topHits.scoreDocs.length; hitIDX++) {
                    final ScoreDoc sd = topHits.scoreDocs[hitIDX];
                    System.out.println("    doc=" + sd.doc + " score=" + sd.score);
                }
            }
        }
        // ... then all shards:
        final Weight w = searcher.createNormalizedWeight(query, true);
        final TopDocs[] shardHits;
        if (sort == null) {
            shardHits = new TopDocs[subSearchers.length];
        } else {
            shardHits = new TopFieldDocs[subSearchers.length];
        }
        for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
            final TopDocs subHits;
            final ShardSearcher subSearcher = subSearchers[shardIDX];
            if (sort == null) {
                subHits = subSearcher.search(w, numHits);
            } else {
                final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
                subSearcher.search(w, c);
                subHits = c.topDocs(0, numHits);
            }
            shardHits[shardIDX] = subHits;
            if (VERBOSE) {
                System.out.println("  shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
                if (subHits.scoreDocs != null) {
                    for (ScoreDoc sd : subHits.scoreDocs) {
                        System.out.println("    doc=" + sd.doc + " score=" + sd.score);
                    }
                }
            }
        }
        // Merge:
        final TopDocs mergedHits;
        if (useFrom) {
            if (sort == null) {
                mergedHits = TopDocs.merge(from, size, shardHits, true);
            } else {
                mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
            }
        } else {
            if (sort == null) {
                mergedHits = TopDocs.merge(numHits, shardHits);
            } else {
                mergedHits = TopDocs.merge(sort, numHits, (TopFieldDocs[]) shardHits);
            }
        }
        if (mergedHits.scoreDocs != null) {
            // Make sure the returned shards are correct:
            for (int hitIDX = 0; hitIDX < mergedHits.scoreDocs.length; hitIDX++) {
                final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
                assertEquals("doc=" + sd.doc + " wrong shard", ReaderUtil.subIndex(sd.doc, docStarts), sd.shardIndex);
            }
        }
        TestUtil.assertEquals(topHits, mergedHits);
    }
    reader.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) Term(org.apache.lucene.index.Term) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)153 BytesRef (org.apache.lucene.util.BytesRef)152 Document (org.apache.lucene.document.Document)137 Directory (org.apache.lucene.store.Directory)109 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)87 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)66 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)53 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)35 StringField (org.apache.lucene.document.StringField)33 TextField (org.apache.lucene.document.TextField)31 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)30 Field (org.apache.lucene.document.Field)30 IndexReader (org.apache.lucene.index.IndexReader)30 Term (org.apache.lucene.index.Term)28 ArrayList (java.util.ArrayList)27 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)25 TermQuery (org.apache.lucene.search.TermQuery)21 IntPoint (org.apache.lucene.document.IntPoint)20 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18