use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestBKD method testBasicInts1D.
public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) {
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 4, 2, 1.0f, 100, true);
byte[] scratch = new byte[4];
for (int docID = 0; docID < 100; docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0);
w.add(scratch, docID);
}
long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
indexFP = w.finish(out);
}
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in);
// Simple 1D range query:
final int queryMin = 42;
final int queryMax = 87;
final BitSet hits = new BitSet();
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docID);
if (VERBOSE) {
System.out.println("visit docID=" + docID);
}
}
@Override
public void visit(int docID, byte[] packedValue) {
int x = NumericUtils.sortableBytesToInt(packedValue, 0);
if (VERBOSE) {
System.out.println("visit docID=" + docID + " x=" + x);
}
if (x >= queryMin && x <= queryMax) {
hits.set(docID);
}
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
int min = NumericUtils.sortableBytesToInt(minPacked, 0);
int max = NumericUtils.sortableBytesToInt(maxPacked, 0);
assert max >= min;
if (VERBOSE) {
System.out.println("compare: min=" + min + " max=" + max + " vs queryMin=" + queryMin + " queryMax=" + queryMax);
}
if (max < queryMin || min > queryMax) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min >= queryMin && max <= queryMax) {
return Relation.CELL_INSIDE_QUERY;
} else {
return Relation.CELL_CROSSES_QUERY;
}
}
});
for (int docID = 0; docID < 100; docID++) {
boolean expected = docID >= queryMin && docID <= queryMax;
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
}
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestBKD method test2DLongOrdsOffline.
public void test2DLongOrdsOffline() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 100000;
boolean singleValuePerDoc = false;
boolean longOrds = true;
int offlineSorterMaxTempFiles = TestUtil.nextInt(random(), 2, 20);
BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", 2, Integer.BYTES, 2, 0.01f, numDocs, singleValuePerDoc, longOrds, 1, offlineSorterMaxTempFiles);
byte[] buffer = new byte[2 * Integer.BYTES];
for (int i = 0; i < numDocs; i++) {
random().nextBytes(buffer);
w.add(buffer, i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
long fp = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
count[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
if (random().nextInt(7) == 1) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
assertEquals(numDocs, count[0]);
in.close();
}
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestBKD method testWastedLeadingBytes.
// Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
// values as a LongPoint:
public void testWastedLeadingBytes() throws Exception {
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
int bytesPerDim = PointValues.MAX_NUM_BYTES;
int bytesUsed = TestUtil.nextInt(random(), 1, 3);
Directory dir = newFSDirectory(createTempDir());
int numDocs = 100000;
BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", numDims, bytesPerDim, 32, 1f, numDocs, true);
byte[] tmp = new byte[bytesUsed];
byte[] buffer = new byte[numDims * bytesPerDim];
for (int i = 0; i < numDocs; i++) {
for (int dim = 0; dim < numDims; dim++) {
random().nextBytes(tmp);
System.arraycopy(tmp, 0, buffer, dim * bytesPerDim + (bytesPerDim - bytesUsed), tmp.length);
}
w.add(buffer, i);
}
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
long fp = w.finish(out);
out.close();
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
count[0]++;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
if (random().nextInt(7) == 1) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
assertEquals(numDocs, count[0]);
in.close();
dir.close();
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestBKD method testEstimatePointCount.
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
// make sure to have multiple leaves
final int numValues = atLeast(10000);
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[] pointValue = new byte[numBytesPerDim];
final byte[] uniquePointValue = new byte[numBytesPerDim];
random().nextBytes(uniquePointValue);
BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, numBytesPerDim, maxPointsInLeafNode, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues, true);
for (int i = 0; i < numValues; ++i) {
if (i == numValues / 2) {
w.add(uniquePointValue, i);
} else {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
w.add(pointValue, i);
}
}
final long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
indexFP = w.finish(out);
w.close();
}
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
pointsIn.seek(indexFP);
BKDReader points = new BKDReader(pointsIn);
int actualMaxPointsInLeafNode = numValues;
while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
}
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = Integer.highestOneBit((numValues - 1) / actualMaxPointsInLeafNode) << 1;
assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(numBytesPerDim, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(numBytesPerDim, uniquePointValue, 0, minPackedValue, 0) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue("" + pointCount, // common case
pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
pointsIn.close();
dir.close();
}
Aggregations