Search in sources :

Example 21 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class TestBKD method testBasicInts1D.

public void testBasicInts1D() throws Exception {
    try (Directory dir = getDirectory(100)) {
        BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
        byte[] scratch = new byte[4];
        for (int docID = 0; docID < 100; docID++) {
            NumericUtils.intToSortableBytes(docID, scratch, 0);
            w.add(scratch, docID);
        }
        long indexFP;
        try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
            indexFP = w.finish(out);
        }
        try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
            in.seek(indexFP);
            BKDReader r = new BKDReader(in);
            // Simple 1D range query:
            final int queryMin = 42;
            final int queryMax = 87;
            final BitSet hits = new BitSet();
            r.intersect(new IntersectVisitor() {

                @Override
                public void visit(int docID) {
                    hits.set(docID);
                    if (VERBOSE) {
                        System.out.println("visit docID=" + docID);
                    }
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    int x = NumericUtils.sortableBytesToInt(packedValue, 0);
                    if (VERBOSE) {
                        System.out.println("visit docID=" + docID + " x=" + x);
                    }
                    if (x >= queryMin && x <= queryMax) {
                        hits.set(docID);
                    }
                }

                @Override
                public Relation compare(byte[] minPacked, byte[] maxPacked) {
                    int min = NumericUtils.sortableBytesToInt(minPacked, 0);
                    int max = NumericUtils.sortableBytesToInt(maxPacked, 0);
                    assert max >= min;
                    if (VERBOSE) {
                        System.out.println("compare: min=" + min + " max=" + max + " vs queryMin=" + queryMin + " queryMax=" + queryMax);
                    }
                    if (max < queryMin || min > queryMax) {
                        return Relation.CELL_OUTSIDE_QUERY;
                    } else if (min >= queryMin && max <= queryMax) {
                        return Relation.CELL_INSIDE_QUERY;
                    } else {
                        return Relation.CELL_CROSSES_QUERY;
                    }
                }
            });
            for (int docID = 0; docID < 100; docID++) {
                boolean expected = docID >= queryMin && docID <= queryMax;
                boolean actual = hits.get(docID);
                assertEquals("docID=" + docID, expected, actual);
            }
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 22 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class TestBKD method test2DLongOrdsOffline.

public void test2DLongOrdsOffline() throws Exception {
    try (Directory dir = newDirectory()) {
        int numDocs = 100000;
        boolean singleValuePerDoc = false;
        boolean longOrds = true;
        int offlineSorterMaxTempFiles = TestUtil.nextInt(random(), 2, 20);
        BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", 2, Integer.BYTES, 2, 0.01f, numDocs, singleValuePerDoc, longOrds, 1, offlineSorterMaxTempFiles);
        byte[] buffer = new byte[2 * Integer.BYTES];
        for (int i = 0; i < numDocs; i++) {
            random().nextBytes(buffer);
            w.add(buffer, i);
        }
        IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
        long fp = w.finish(out);
        out.close();
        IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
        in.seek(fp);
        BKDReader r = new BKDReader(in);
        int[] count = new int[1];
        r.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                count[0]++;
            }

            @Override
            public void visit(int docID, byte[] packedValue) {
                visit(docID);
            }

            @Override
            public Relation compare(byte[] minPacked, byte[] maxPacked) {
                if (random().nextInt(7) == 1) {
                    return Relation.CELL_CROSSES_QUERY;
                } else {
                    return Relation.CELL_INSIDE_QUERY;
                }
            }
        });
        assertEquals(numDocs, count[0]);
        in.close();
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 23 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class TestBKD method testWastedLeadingBytes.

// Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
// values as a LongPoint:
public void testWastedLeadingBytes() throws Exception {
    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
    int bytesPerDim = PointValues.MAX_NUM_BYTES;
    int bytesUsed = TestUtil.nextInt(random(), 1, 3);
    Directory dir = newFSDirectory(createTempDir());
    int numDocs = 100000;
    BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", numDims, bytesPerDim, 32, 1f, numDocs, true);
    byte[] tmp = new byte[bytesUsed];
    byte[] buffer = new byte[numDims * bytesPerDim];
    for (int i = 0; i < numDocs; i++) {
        for (int dim = 0; dim < numDims; dim++) {
            random().nextBytes(tmp);
            System.arraycopy(tmp, 0, buffer, dim * bytesPerDim + (bytesPerDim - bytesUsed), tmp.length);
        }
        w.add(buffer, i);
    }
    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
    long fp = w.finish(out);
    out.close();
    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
    in.seek(fp);
    BKDReader r = new BKDReader(in);
    int[] count = new int[1];
    r.intersect(new IntersectVisitor() {

        @Override
        public void visit(int docID) {
            count[0]++;
        }

        @Override
        public void visit(int docID, byte[] packedValue) {
            visit(docID);
        }

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            if (random().nextInt(7) == 1) {
                return Relation.CELL_CROSSES_QUERY;
            } else {
                return Relation.CELL_INSIDE_QUERY;
            }
        }
    });
    assertEquals(numDocs, count[0]);
    in.close();
    dir.close();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 24 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class TestBKD method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    // make sure to have multiple leaves
    final int numValues = atLeast(10000);
    final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
    final byte[] pointValue = new byte[numBytesPerDim];
    final byte[] uniquePointValue = new byte[numBytesPerDim];
    random().nextBytes(uniquePointValue);
    BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, numBytesPerDim, maxPointsInLeafNode, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues, true);
    for (int i = 0; i < numValues; ++i) {
        if (i == numValues / 2) {
            w.add(uniquePointValue, i);
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            w.add(pointValue, i);
        }
    }
    final long indexFP;
    try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
        indexFP = w.finish(out);
        w.close();
    }
    IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
    pointsIn.seek(indexFP);
    BKDReader points = new BKDReader(pointsIn);
    int actualMaxPointsInLeafNode = numValues;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
        actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numValues - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(numBytesPerDim, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(numBytesPerDim, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
    pointsIn.close();
    dir.close();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) IOException(java.io.IOException) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)24 Relation (org.apache.lucene.index.PointValues.Relation)22 Directory (org.apache.lucene.store.Directory)14 IOException (java.io.IOException)9 PointValues (org.apache.lucene.index.PointValues)9 IndexInput (org.apache.lucene.store.IndexInput)9 IndexOutput (org.apache.lucene.store.IndexOutput)9 BitSet (java.util.BitSet)8 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)8 BinaryPoint (org.apache.lucene.document.BinaryPoint)7 Document (org.apache.lucene.document.Document)7 FilterDirectory (org.apache.lucene.store.FilterDirectory)7 IntPoint (org.apache.lucene.document.IntPoint)6 LeafReader (org.apache.lucene.index.LeafReader)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)4 FieldInfo (org.apache.lucene.index.FieldInfo)3 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)3 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)3 BigInteger (java.math.BigInteger)2