use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class SimpleTextBKDWriter method writeField1Dim.
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
reader.intersect(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
oneDimWriter.add(packedValue, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new IllegalStateException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
return oneDimWriter.finish();
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class SimpleTextPointsWriter method writeField.
@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
PointValues values = reader.getValues(fieldInfo.name);
boolean singleValuePerDoc = values.size() == values.getDocCount();
// We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, values.size(), singleValuePerDoc)) {
values.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) throws IOException {
writer.add(packedValue, docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
// We could have 0 points on merge since all docs with points may be deleted:
if (writer.getPointCount() > 0) {
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
}
}
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class BKDWriter method writeField1Dim.
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
reader.intersect(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
oneDimWriter.add(packedValue, docID);
}
@Override
public void visit(int docID) throws IOException {
throw new IllegalStateException();
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
return oneDimWriter.finish();
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class BasePointsFormatTestCase method testBigIntNDims.
// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
int numDocs = atLeast(1000);
try (Directory dir = getDirectory(numDocs)) {
int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// We rely on docIDs not changing:
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
BigInteger[][] docs = new BigInteger[numDocs][];
for (int docID = 0; docID < numDocs; docID++) {
BigInteger[] values = new BigInteger[numDims];
if (VERBOSE) {
System.out.println(" docID=" + docID);
}
byte[][] bytes = new byte[numDims][];
for (int dim = 0; dim < numDims; dim++) {
values[dim] = randomBigInt(numBytesPerDim);
bytes[dim] = new byte[numBytesPerDim];
NumericUtils.bigIntToSortableBytes(values[dim], numBytesPerDim, bytes[dim], 0);
if (VERBOSE) {
System.out.println(" " + dim + " -> " + values[dim]);
}
}
docs[docID] = values;
Document doc = new Document();
doc.add(new BinaryPoint("field", bytes));
w.addDocument(doc);
}
DirectoryReader r = w.getReader();
w.close();
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
// Random N dims rect query:
BigInteger[] queryMin = new BigInteger[numDims];
BigInteger[] queryMax = new BigInteger[numDims];
for (int dim = 0; dim < numDims; dim++) {
queryMin[dim] = randomBigInt(numBytesPerDim);
queryMax[dim] = randomBigInt(numBytesPerDim);
if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
BigInteger x = queryMin[dim];
queryMin[dim] = queryMax[dim];
queryMax[dim] = x;
}
if (VERBOSE) {
System.out.println(" " + dim + "\n min=" + queryMin[dim] + "\n max=" + queryMax[dim]);
}
}
final BitSet hits = new BitSet();
for (LeafReaderContext ctx : r.leaves()) {
PointValues dimValues = ctx.reader().getPointValues("field");
if (dimValues == null) {
continue;
}
final int docBase = ctx.docBase;
dimValues.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
hits.set(docBase + docID);
//System.out.println("visit docID=" + docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
//System.out.println("visit check docID=" + docID);
for (int dim = 0; dim < numDims; dim++) {
BigInteger x = NumericUtils.sortableBytesToBigInt(packedValue, dim * numBytesPerDim, numBytesPerDim);
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
//System.out.println(" no");
return;
}
}
//System.out.println(" yes");
hits.set(docBase + docID);
}
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
boolean crosses = false;
for (int dim = 0; dim < numDims; dim++) {
BigInteger min = NumericUtils.sortableBytesToBigInt(minPacked, dim * numBytesPerDim, numBytesPerDim);
BigInteger max = NumericUtils.sortableBytesToBigInt(maxPacked, dim * numBytesPerDim, numBytesPerDim);
assert max.compareTo(min) >= 0;
if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
crosses = true;
}
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
}
for (int docID = 0; docID < numDocs; docID++) {
BigInteger[] docValues = docs[docID];
boolean expected = true;
for (int dim = 0; dim < numDims; dim++) {
BigInteger x = docValues[dim];
if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
expected = false;
break;
}
}
boolean actual = hits.get(docID);
assertEquals("docID=" + docID, expected, actual);
}
}
r.close();
}
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class BasePointsFormatTestCase method testAllPointDocsDeletedInSegment.
public void testAllPointDocsDeletedInSegment() throws Exception {
Directory dir = getDirectory(20);
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter w = new IndexWriter(dir, iwc);
byte[] point = new byte[4];
for (int i = 0; i < 10; i++) {
Document doc = new Document();
NumericUtils.intToSortableBytes(i, point, 0);
doc.add(new BinaryPoint("dim", point));
doc.add(new NumericDocValuesField("id", i));
doc.add(newStringField("x", "x", Field.Store.NO));
w.addDocument(doc);
}
w.addDocument(new Document());
w.deleteDocuments(new Term("x", "x"));
if (random().nextBoolean()) {
w.forceMerge(1);
}
w.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.numDocs());
Bits liveDocs = MultiFields.getLiveDocs(r);
for (LeafReaderContext ctx : r.leaves()) {
PointValues values = ctx.reader().getPointValues("dim");
NumericDocValues idValues = ctx.reader().getNumericDocValues("id");
if (idValues == null) {
// will drop the 100% deleted segments, and the "id" field never exists in the final single doc segment
continue;
}
int[] docIDToID = new int[ctx.reader().maxDoc()];
int docID;
while ((docID = idValues.nextDoc()) != NO_MORE_DOCS) {
docIDToID[docID] = (int) idValues.longValue();
}
if (values != null) {
BitSet seen = new BitSet();
values.intersect(new IntersectVisitor() {
@Override
public Relation compare(byte[] minPacked, byte[] maxPacked) {
return Relation.CELL_CROSSES_QUERY;
}
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) {
if (liveDocs.get(docID)) {
seen.set(docID);
}
assertEquals(docIDToID[docID], NumericUtils.sortableBytesToInt(packedValue, 0));
}
});
assertEquals(0, seen.cardinality());
}
}
IOUtils.close(r, dir);
}
Aggregations