use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class RangeFieldQuery method createWeight.
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
final RangeFieldComparator target = new RangeFieldComparator();
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) throws IOException {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] leaf) throws IOException {
if (target.matches(leaf)) {
adder.add(docID);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return compareRange(minPackedValue, maxPackedValue);
}
});
return result.build();
}
private Relation compareRange(byte[] minPackedValue, byte[] maxPackedValue) {
byte[] node = getInternalRange(minPackedValue, maxPackedValue);
// compute range relation for BKD traversal
if (target.intersects(node) == false) {
return Relation.CELL_OUTSIDE_QUERY;
} else if (target.within(node)) {
// target within cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
} else if (target.contains(node)) {
// target contains cell; add iff queryType is not a CONTAINS or CROSSES query:
return (queryType == QueryType.CONTAINS || queryType == QueryType.CROSSES) ? Relation.CELL_OUTSIDE_QUERY : Relation.CELL_INSIDE_QUERY;
}
// target intersects cell; continue traversing:
return Relation.CELL_CROSSES_QUERY;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// no docs in this segment indexed any ranges
return null;
}
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo == null) {
// no docs in this segment indexed this field
return null;
}
checkFieldInfo(fieldInfo);
boolean allDocsMatch = false;
if (values.getDocCount() == reader.maxDoc() && compareRange(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) {
allDocsMatch = true;
}
DocIdSetIterator iterator = allDocsMatch == true ? DocIdSetIterator.all(reader.maxDoc()) : buildMatchingDocIdSet(reader, values).iterator();
return new ConstantScoreScorer(this, score(), iterator);
}
/** get an encoded byte representation of the internal node; this is
* the lower half of the min array and the upper half of the max array */
private byte[] getInternalRange(byte[] min, byte[] max) {
byte[] range = new byte[min.length];
final int dimSize = numDims * bytesPerDim;
System.arraycopy(min, 0, range, 0, dimSize);
System.arraycopy(max, dimSize, range, dimSize, dimSize);
return range;
}
};
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestLucene60PointsFormat method testEstimatePointCount.
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// number of points per leaf hard to predict
while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
iwc.setMergePolicy(newMergePolicy());
}
IndexWriter w = new IndexWriter(dir, iwc);
byte[] pointValue = new byte[3];
byte[] uniquePointValue = new byte[3];
random().nextBytes(uniquePointValue);
// make sure we have several leaves
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
doc.add(new BinaryPoint("f", pointValue));
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue("" + pointCount, // common case
pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
r.close();
dir.close();
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestPointValues method doTestMergedStats.
private void doTestMergedStats() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
Directory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
final int numDocs = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numPoints = random().nextInt(3);
for (int j = 0; j < numPoints; ++j) {
doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
}
w.addDocument(doc);
if (random().nextBoolean()) {
DirectoryReader.open(w).close();
}
}
final IndexReader reader1 = DirectoryReader.open(w);
w.forceMerge(1);
final IndexReader reader2 = DirectoryReader.open(w);
final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
if (expected == null) {
assertNull(PointValues.getMinPackedValue(reader1, "field"));
assertNull(PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(0, PointValues.getDocCount(reader1, "field"));
assertEquals(0, PointValues.size(reader1, "field"));
} else {
assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
assertEquals(expected.size(), PointValues.size(reader1, "field"));
}
IOUtils.close(w, reader1, reader2, dir);
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class Lucene60PointsWriter method writeField.
@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
PointValues values = reader.getValues(fieldInfo.name);
boolean singleValuePerDoc = values.size() == values.getDocCount();
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, values.size(), singleValuePerDoc)) {
if (values instanceof MutablePointValues) {
final long fp = writer.writeField(dataOut, fieldInfo.name, (MutablePointValues) values);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
return;
}
values.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) throws IOException {
writer.add(packedValue, docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
// We could have 0 points on merge since all docs with dimensional fields may be deleted:
if (writer.getPointCount() > 0) {
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
}
}
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class LatLonPoint method nearest.
/**
* Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
* <p>
* This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
* but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this
* only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are
* currently not de-duplicated, so if a document had multiple instances of the specified field that
* make it into the top n, that document will appear more than once.
* <p>
* Documents are ordered by ascending distance from the location. The value returned in {@link FieldDoc} for
* the hits contains a Double instance with the distance in meters.
*
* @param searcher IndexSearcher to find nearest points from.
* @param field field name. must not be null.
* @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
* @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
* @param n the number of nearest neighbors to retrieve.
* @return TopFieldDocs containing documents ordered by distance, where the field value for each {@link FieldDoc} is the distance in meters
* @throws IllegalArgumentException if the underlying PointValues is not a {@code Lucene60PointsReader} (this is a current limitation), or
* if {@code field} or {@code searcher} is null, or if {@code latitude}, {@code longitude} or {@code n} are out-of-bounds
* @throws IOException if an IOException occurs while finding the points.
*/
// TODO: what about multi-valued documents? what happens?
public static TopFieldDocs nearest(IndexSearcher searcher, String field, double latitude, double longitude, int n) throws IOException {
GeoUtils.checkLatitude(latitude);
GeoUtils.checkLongitude(longitude);
if (n < 1) {
throw new IllegalArgumentException("n must be at least 1; got " + n);
}
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
if (searcher == null) {
throw new IllegalArgumentException("searcher must not be null");
}
List<BKDReader> readers = new ArrayList<>();
List<Integer> docBases = new ArrayList<>();
List<Bits> liveDocs = new ArrayList<>();
int totalHits = 0;
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
PointValues points = leaf.reader().getPointValues(field);
if (points != null) {
if (points instanceof BKDReader == false) {
throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
}
totalHits += points.getDocCount();
BKDReader reader = (BKDReader) points;
if (reader != null) {
readers.add(reader);
docBases.add(leaf.docBase);
liveDocs.add(leaf.reader().getLiveDocs());
}
}
}
NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
// Convert to TopFieldDocs:
ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
for (int i = 0; i < hits.length; i++) {
NearestNeighbor.NearestHit hit = hits[i];
scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] { Double.valueOf(hit.distanceMeters) });
}
return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
}
Aggregations