Search in sources :

Example 21 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method test2DLongOrdsOffline.

public void test2DLongOrdsOffline() throws Exception {
    try (Directory dir = newDirectory()) {
        int numDocs = 100000;
        boolean singleValuePerDoc = false;
        boolean longOrds = true;
        int offlineSorterMaxTempFiles = TestUtil.nextInt(random(), 2, 20);
        BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", 2, Integer.BYTES, 2, 0.01f, numDocs, singleValuePerDoc, longOrds, 1, offlineSorterMaxTempFiles);
        byte[] buffer = new byte[2 * Integer.BYTES];
        for (int i = 0; i < numDocs; i++) {
            random().nextBytes(buffer);
            w.add(buffer, i);
        }
        IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
        long fp = w.finish(out);
        out.close();
        IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
        in.seek(fp);
        BKDReader r = new BKDReader(in);
        int[] count = new int[1];
        r.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                count[0]++;
            }

            @Override
            public void visit(int docID, byte[] packedValue) {
                visit(docID);
            }

            @Override
            public Relation compare(byte[] minPacked, byte[] maxPacked) {
                if (random().nextInt(7) == 1) {
                    return Relation.CELL_CROSSES_QUERY;
                } else {
                    return Relation.CELL_INSIDE_QUERY;
                }
            }
        });
        assertEquals(numDocs, count[0]);
        in.close();
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 22 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method testWastedLeadingBytes.

// Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
// values as a LongPoint:
public void testWastedLeadingBytes() throws Exception {
    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
    int bytesPerDim = PointValues.MAX_NUM_BYTES;
    int bytesUsed = TestUtil.nextInt(random(), 1, 3);
    Directory dir = newFSDirectory(createTempDir());
    int numDocs = 100000;
    BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", numDims, bytesPerDim, 32, 1f, numDocs, true);
    byte[] tmp = new byte[bytesUsed];
    byte[] buffer = new byte[numDims * bytesPerDim];
    for (int i = 0; i < numDocs; i++) {
        for (int dim = 0; dim < numDims; dim++) {
            random().nextBytes(tmp);
            System.arraycopy(tmp, 0, buffer, dim * bytesPerDim + (bytesPerDim - bytesUsed), tmp.length);
        }
        w.add(buffer, i);
    }
    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
    long fp = w.finish(out);
    out.close();
    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
    in.seek(fp);
    BKDReader r = new BKDReader(in);
    int[] count = new int[1];
    r.intersect(new IntersectVisitor() {

        @Override
        public void visit(int docID) {
            count[0]++;
        }

        @Override
        public void visit(int docID, byte[] packedValue) {
            visit(docID);
        }

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            if (random().nextInt(7) == 1) {
                return Relation.CELL_CROSSES_QUERY;
            } else {
                return Relation.CELL_INSIDE_QUERY;
            }
        }
    });
    assertEquals(numDocs, count[0]);
    in.close();
    dir.close();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 23 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    // make sure to have multiple leaves
    final int numValues = atLeast(10000);
    final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
    final byte[] pointValue = new byte[numBytesPerDim];
    final byte[] uniquePointValue = new byte[numBytesPerDim];
    random().nextBytes(uniquePointValue);
    BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, numBytesPerDim, maxPointsInLeafNode, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues, true);
    for (int i = 0; i < numValues; ++i) {
        if (i == numValues / 2) {
            w.add(uniquePointValue, i);
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            w.add(pointValue, i);
        }
    }
    final long indexFP;
    try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
        indexFP = w.finish(out);
        w.close();
    }
    IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
    pointsIn.seek(indexFP);
    BKDReader points = new BKDReader(pointsIn);
    int actualMaxPointsInLeafNode = numValues;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
        actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numValues - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(numBytesPerDim, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(numBytesPerDim, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
    pointsIn.close();
    dir.close();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) IOException(java.io.IOException) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)23 Relation (org.apache.lucene.index.PointValues.Relation)23 Directory (org.apache.lucene.store.Directory)15 IOException (java.io.IOException)9 BitSet (java.util.BitSet)9 IndexInput (org.apache.lucene.store.IndexInput)9 IndexOutput (org.apache.lucene.store.IndexOutput)9 BinaryPoint (org.apache.lucene.document.BinaryPoint)8 Document (org.apache.lucene.document.Document)8 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)8 PointValues (org.apache.lucene.index.PointValues)7 FilterDirectory (org.apache.lucene.store.FilterDirectory)7 IntPoint (org.apache.lucene.document.IntPoint)6 LeafReader (org.apache.lucene.index.LeafReader)4 BigInteger (java.math.BigInteger)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 FieldInfo (org.apache.lucene.index.FieldInfo)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexWriter (org.apache.lucene.index.IndexWriter)2