use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestPointValues method testSparsePoints.
public void testSparsePoints() throws Exception {
Directory dir = newDirectory();
int numDocs = atLeast(1000);
int numFields = TestUtil.nextInt(random(), 1, 10);
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int[] fieldDocCounts = new int[numFields];
int[] fieldSizes = new int[numFields];
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
for (int field = 0; field < numFields; field++) {
String fieldName = "int" + field;
if (random().nextInt(100) == 17) {
doc.add(new IntPoint(fieldName, random().nextInt()));
fieldDocCounts[field]++;
fieldSizes[field]++;
if (random().nextInt(10) == 5) {
// add same field again!
doc.add(new IntPoint(fieldName, random().nextInt()));
fieldSizes[field]++;
}
}
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
for (int field = 0; field < numFields; field++) {
int docCount = 0;
int size = 0;
String fieldName = "int" + field;
for (LeafReaderContext ctx : r.leaves()) {
PointValues points = ctx.reader().getPointValues(fieldName);
if (points != null) {
docCount += points.getDocCount();
size += points.size();
}
}
assertEquals(fieldDocCounts[field], docCount);
assertEquals(fieldSizes[field], size);
}
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class LatLonPointDistanceQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);
// create bounding box(es) for the distance range
// these are pre-encoded with LatLonPoint's encoding
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
// second set of longitude ranges to check (for cross-dateline case)
final byte[] minLon2 = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
// crosses dateline: split
if (box.crossesDateline()) {
// box1
NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
// box2
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0);
} else {
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
// disable box2
NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0);
}
// compute exact sort key: avoid any asin() computations
final double sortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
final double axisLat = Rectangle.axisLat(latitude, radiusMeters);
return new ConstantScoreWeight(this, boost) {
final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment had any points fields
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
// bounding box check
if (StringHelper.compare(Integer.BYTES, packedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return;
}
if ((StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, packedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return;
}
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (distancePredicate.test(docLatitude, docLongitude)) {
adder.add(docID);
}
}
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
// 1. check our bounding box(es) first. if the subtree is entirely outside of those, bail.
// 2. check if the subtree is disjoint. it may cross the bounding box but not intersect with circle
// 3. see if the subtree is fully contained. if the subtree is enormous along the x axis, wrapping half way around the world, etc: then this can't work, just go to step 4.
// 4. recurse naively (subtrees crossing over circle edge)
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
if ((StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) && StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double latMin = decodeLatitude(minPackedValue, 0);
double lonMin = decodeLongitude(minPackedValue, Integer.BYTES);
double latMax = decodeLatitude(maxPackedValue, 0);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
}
};
final Weight weight = this;
return new ScorerSupplier() {
long cost = -1;
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), result.build().iterator());
}
@Override
public long cost() {
if (cost == -1) {
cost = values.estimatePointCount(visitor);
}
assert cost >= 0;
return cost;
}
};
}
};
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class LatLonPointInPolygonQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
// bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
// these are pre-encoded with LatLonPoint's encoding
final Rectangle box = Rectangle.fromPolygon(polygons);
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
final Polygon2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment had any points fields
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// No docs in this segment indexed this field at all
return null;
}
LatLonPoint.checkCompatible(fieldInfo);
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0), NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(Integer.BYTES, minPackedValue, 0, maxLat, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, 0, minLat, 0) < 0 || StringHelper.compare(Integer.BYTES, minPackedValue, Integer.BYTES, maxLon, 0) > 0 || StringHelper.compare(Integer.BYTES, maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(minPackedValue, 0);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
}
});
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestDocIdSetBuilder method testLeverageStats.
public void testLeverageStats() throws IOException {
// single-valued points
PointValues values = new DummyPointValues(42, 42);
DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertFalse(builder.multivalued);
DocIdSetBuilder.BulkAdder adder = builder.grow(2);
adder.add(5);
adder.add(7);
DocIdSet set = builder.build();
assertTrue(set instanceof BitDocIdSet);
assertEquals(2, set.iterator().cost());
// multi-valued points
values = new DummyPointValues(42, 63);
builder = new DocIdSetBuilder(100, values, "foo");
assertEquals(1.5, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
adder = builder.grow(2);
adder.add(5);
adder.add(7);
set = builder.build();
assertTrue(set instanceof BitDocIdSet);
// it thinks the same doc was added twice
assertEquals(1, set.iterator().cost());
// incomplete stats
values = new DummyPointValues(42, -1);
builder = new DocIdSetBuilder(100, values, "foo");
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
values = new DummyPointValues(-1, 84);
builder = new DocIdSetBuilder(100, values, "foo");
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
// single-valued terms
Terms terms = new DummyTerms(42, 42);
builder = new DocIdSetBuilder(100, terms);
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertFalse(builder.multivalued);
adder = builder.grow(2);
adder.add(5);
adder.add(7);
set = builder.build();
assertTrue(set instanceof BitDocIdSet);
assertEquals(2, set.iterator().cost());
// multi-valued terms
terms = new DummyTerms(42, 63);
builder = new DocIdSetBuilder(100, terms);
assertEquals(1.5, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
adder = builder.grow(2);
adder.add(5);
adder.add(7);
set = builder.build();
assertTrue(set instanceof BitDocIdSet);
// it thinks the same doc was added twice
assertEquals(1, set.iterator().cost());
// incomplete stats
terms = new DummyTerms(42, -1);
builder = new DocIdSetBuilder(100, terms);
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
terms = new DummyTerms(-1, 84);
builder = new DocIdSetBuilder(100, terms);
assertEquals(1d, builder.numValuesPerDoc, 0d);
assertTrue(builder.multivalued);
}
Aggregations