use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class PointRangeQuery method createWeight.
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
// Doc's value is too low, in this dimension
return;
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
// Doc's value is too high, in this dimension
return;
}
}
// Doc is in-bounds
adder.add(docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
};
}
/**
* Create a visitor that clears documents that do NOT match the range.
*/
private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
return new IntersectVisitor() {
@Override
public void visit(int docID) {
result.clear(docID);
cost[0]--;
}
@Override
public void visit(int docID, byte[] packedValue) {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
// Doc's value is too low, in this dimension
result.clear(docID);
cost[0]--;
return;
}
if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
// Doc's value is too high, in this dimension
result.clear(docID);
cost[0]--;
return;
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
boolean crosses = false;
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
// This dim is not in the range
return Relation.CELL_INSIDE_QUERY;
}
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 || StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_OUTSIDE_QUERY;
}
}
};
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment/field indexed any points
return null;
}
if (values.getNumDimensions() != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
}
if (bytesPerDim != values.getBytesPerDimension()) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
}
boolean allDocsMatch;
if (values.getDocCount() == reader.maxDoc()) {
final byte[] fieldPackedLower = values.getMinPackedValue();
final byte[] fieldPackedUpper = values.getMaxPackedValue();
allDocsMatch = true;
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0 || StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
allDocsMatch = false;
break;
}
}
} else {
allDocsMatch = false;
}
final Weight weight = this;
if (allDocsMatch) {
// all docs have a value and all points are within bounds, so everything matches
return new ScorerSupplier() {
@Override
public Scorer get(boolean randomAccess) {
return new ConstantScoreScorer(weight, score(), DocIdSetIterator.all(reader.maxDoc()));
}
@Override
public long cost() {
return reader.maxDoc();
}
};
} else {
return new ScorerSupplier() {
final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result);
long cost = -1;
@Override
public Scorer get(boolean randomAccess) throws IOException {
if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) {
// If all docs have exactly one value and the cost is greater
// than half the leaf size then maybe we can make things faster
// by computing the set of documents that do NOT match the range
final FixedBitSet result = new FixedBitSet(reader.maxDoc());
result.set(0, reader.maxDoc());
int[] cost = new int[] { reader.maxDoc() };
values.intersect(getInverseIntersectVisitor(result, cost));
final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
return new ConstantScoreScorer(weight, score(), iterator);
}
values.intersect(visitor);
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(weight, score(), iterator);
}
@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
};
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestLucene60PointsFormat method testEstimatePointCount2Dims.
// The tree is always balanced in the N dims case, and leaves are
// not all full so things are a bit different
public void testEstimatePointCount2Dims() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
byte[][] pointValue = new byte[2][];
pointValue[0] = new byte[3];
pointValue[1] = new byte[3];
byte[][] uniquePointValue = new byte[2][];
uniquePointValue[0] = new byte[3];
uniquePointValue[1] = new byte[3];
random().nextBytes(uniquePointValue[0]);
random().nextBytes(uniquePointValue[1]);
// make sure we have several leaves
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
do {
random().nextBytes(pointValue[0]);
random().nextBytes(pointValue[1]);
} while (Arrays.equals(pointValue[0], uniquePointValue[0]) || Arrays.equals(pointValue[1], uniquePointValue[1]));
doc.add(new BinaryPoint("f", pointValue));
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// With >1 dims, the tree is balanced
int actualMaxPointsInLeafNode = numDocs;
while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
}
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = Integer.highestOneBit((numDocs - 1) / actualMaxPointsInLeafNode) << 1;
assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
for (int dim = 0; dim < 2; ++dim) {
if (StringHelper.compare(3, uniquePointValue[dim], 0, maxPackedValue, dim * 3) > 0 || StringHelper.compare(3, uniquePointValue[dim], 0, minPackedValue, dim * 3) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue("" + pointCount, // common case
pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
r.close();
dir.close();
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class TestPointValues method testTieBreakByDocID.
public void testTieBreakByDocID() throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig iwc = newIndexWriterConfig();
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 17));
for (int i = 0; i < 300000; i++) {
w.addDocument(doc);
if (random().nextInt(1000) == 17) {
w.commit();
}
}
IndexReader r = DirectoryReader.open(w);
for (LeafReaderContext ctx : r.leaves()) {
PointValues points = ctx.reader().getPointValues("int");
points.intersect(new IntersectVisitor() {
int lastDocID = -1;
@Override
public void visit(int docID) {
if (docID < lastDocID) {
fail("docs out of order: docID=" + docID + " but lastDocID=" + lastDocID);
}
lastDocID = docID;
}
@Override
public void visit(int docID, byte[] packedValue) {
visit(docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (random().nextBoolean()) {
return Relation.CELL_CROSSES_QUERY;
} else {
return Relation.CELL_INSIDE_QUERY;
}
}
});
}
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class LatLonPointDistanceQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);
// create bounding box(es) for the distance range
// these are pre-encoded with LatLonPoint's encoding
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
// second set of longitude ranges to check (for cross-dateline case)
final byte[] minLon2 = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
// crosses dateline: split
if (box.crossesDateline()) {
// box1
NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
// box2
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0);
} else {
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
// disable box2
NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0);
}
// compute exact sort key: avoid any asin() computations
final double sortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
final double axisLat = Rectangle.axisLat(latitude, radiusMeters);
return new ConstantScoreWeight(this, boost) {
final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment had any points fields
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
// bounding box check
if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return;
}
if ((StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return;
}
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (distancePredicate.test(docLatitude, docLongitude)) {
adder.add(docID);
}
}
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
// 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail.
// 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle
// 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4.
// 4. recurse naively (subtrees crossing over circle edge)
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
if ((StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double latMin = decodeLatitude(minPackedValue, 0);
double lonMin = decodeLongitude(minPackedValue, Integer.BYTES);
double latMax = decodeLatitude(maxPackedValue, 0);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
}
};
final Weight weight = this;
return new ScorerSupplier() {
long cost = -1;
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), result.build().iterator());
}
@Override
public long cost() {
if (cost == -1) {
cost = values.estimatePointCount(visitor);
}
assert cost >= 0;
return cost;
}
};
}
};
}
use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.
the class LatLonPointInPolygonQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
// bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
// these are pre-encoded with LatLonPoint's encoding
final Rectangle box = Rectangle.fromPolygon(polygons);
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
final Polygon2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment had any points fields
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0 || StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(minPackedValue, 0);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
}
});
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}
Aggregations