Search in sources :

Example 1 with MockRandomMergePolicy

use of org.apache.lucene.index.MockRandomMergePolicy in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with MockRandomMergePolicy

use of org.apache.lucene.index.MockRandomMergePolicy in project lucene-solr by apache.

the class TestEarlyTerminatingSortingCollector method createRandomIndex.

private void createRandomIndex(boolean singleSortedSegment) throws IOException {
    dir = newDirectory();
    numDocs = atLeast(150);
    final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
    Set<String> randomTerms = new HashSet<>();
    while (randomTerms.size() < numTerms) {
        randomTerms.add(TestUtil.randomSimpleString(random()));
    }
    terms = new ArrayList<>(randomTerms);
    final long seed = random().nextLong();
    final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
    if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        // MockRandomMP randomly wraps the leaf readers which makes merging angry
        iwc.setMergePolicy(newTieredMergePolicy());
    }
    // for reproducible tests
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setIndexSort(sort);
    iw = new RandomIndexWriter(new Random(seed), dir, iwc);
    // don't do this, it may happen anyway with MockRandomMP
    iw.setDoRandomForceMerge(false);
    for (int i = 0; i < numDocs; ++i) {
        final Document doc = randomDocument();
        iw.addDocument(doc);
        if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
            iw.commit();
        }
        if (random().nextInt(15) == 0) {
            final String term = RandomPicks.randomFrom(random(), terms);
            iw.deleteDocuments(new Term("s", term));
        }
    }
    if (singleSortedSegment) {
        // because of deletions, there might still be a single flush segment in
        // the index, although want want a sorted segment so it needs to be merged
        // refresh
        iw.getReader().close();
        iw.addDocument(new Document());
        iw.commit();
        iw.addDocument(new Document());
        iw.forceMerge(1);
    } else if (random().nextBoolean()) {
        iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
    }
    reader = iw.getReader();
}
Also used : MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Random(java.util.Random) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) HashSet(java.util.HashSet) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

Document (org.apache.lucene.document.Document)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 MockRandomMergePolicy (org.apache.lucene.index.MockRandomMergePolicy)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 Random (java.util.Random)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 BinaryPoint (org.apache.lucene.document.BinaryPoint)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 LeafReader (org.apache.lucene.index.LeafReader)1 PointValues (org.apache.lucene.index.PointValues)1 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)1 Relation (org.apache.lucene.index.PointValues.Relation)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 SerialMergeScheduler (org.apache.lucene.index.SerialMergeScheduler)1 Term (org.apache.lucene.index.Term)1 Directory (org.apache.lucene.store.Directory)1