use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestLucene60PointsFormat method testEstimatePointCount.
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// number of points per leaf hard to predict
while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
iwc.setMergePolicy(newMergePolicy());
}
IndexWriter w = new IndexWriter(dir, iwc);
byte[] pointValue = new byte[3];
byte[] uniquePointValue = new byte[3];
random().nextBytes(uniquePointValue);
// make sure we have several leaves
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
doc.add(new BinaryPoint("f", pointValue));
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue("" + pointCount, // common case
pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
r.close();
dir.close();
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class TestPointValues method doTestMergedStats.
private void doTestMergedStats() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
Directory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
final int numDocs = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numPoints = random().nextInt(3);
for (int j = 0; j < numPoints; ++j) {
doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
}
w.addDocument(doc);
if (random().nextBoolean()) {
DirectoryReader.open(w).close();
}
}
final IndexReader reader1 = DirectoryReader.open(w);
w.forceMerge(1);
final IndexReader reader2 = DirectoryReader.open(w);
final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
if (expected == null) {
assertNull(PointValues.getMinPackedValue(reader1, "field"));
assertNull(PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(0, PointValues.getDocCount(reader1, "field"));
assertEquals(0, PointValues.size(reader1, "field"));
} else {
assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
assertEquals(expected.size(), PointValues.size(reader1, "field"));
}
IOUtils.close(w, reader1, reader2, dir);
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class PointsWriter method mergeOneField.
/** Default naive merge implementation for one field: it just re-indexes all the values
* from the incoming segment. The default codec overrides this for 1D fields and uses
* a faster but more complex implementation. */
protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
long maxPointCount = 0;
int docCount = 0;
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues values = pointsReader.getValues(fieldInfo.name);
if (values != null) {
maxPointCount += values.size();
docCount += values.getDocCount();
}
}
}
}
final long finalMaxPointCount = maxPointCount;
final int finalDocCount = docCount;
writeField(fieldInfo, new PointsReader() {
@Override
public long ramBytesUsed() {
return 0;
}
@Override
public void close() throws IOException {
}
@Override
public PointValues getValues(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("field name must match the field being merged");
}
return new PointValues() {
@Override
public void intersect(IntersectVisitor mergedVisitor) throws IOException {
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader pointsReader = mergeState.pointsReaders[i];
if (pointsReader == null) {
// This segment has no points
continue;
}
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldName);
if (readerFieldInfo == null) {
// This segment never saw this field
continue;
}
if (readerFieldInfo.getPointDimensionCount() == 0) {
// This segment saw this field, but the field did not index points in it:
continue;
}
PointValues values = pointsReader.getValues(fieldName);
if (values == null) {
continue;
}
MergeState.DocMap docMap = mergeState.docMaps[i];
values.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
// Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
throw new IllegalStateException();
}
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
int newDocID = docMap.get(docID);
if (newDocID != -1) {
// Not deleted:
mergedVisitor.visit(newDocID, packedValue);
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
// Forces this segment's PointsReader to always visit all docs + values:
return Relation.CELL_CROSSES_QUERY;
}
});
}
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return finalMaxPointCount;
}
@Override
public int getDocCount() {
return finalDocCount;
}
};
}
@Override
public void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
});
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class SimpleTextPointsWriter method writeField.
@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
PointValues values = reader.getValues(fieldInfo.name);
boolean singleValuePerDoc = values.size() == values.getDocCount();
// We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, values.size(), singleValuePerDoc)) {
values.intersect(new IntersectVisitor() {
@Override
public void visit(int docID) {
throw new IllegalStateException();
}
public void visit(int docID, byte[] packedValue) throws IOException {
writer.add(packedValue, docID);
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
// We could have 0 points on merge since all docs with points may be deleted:
if (writer.getPointCount() > 0) {
indexFPs.put(fieldInfo.name, writer.finish(dataOut));
}
}
}
use of org.apache.lucene.index.PointValues in project lucene-solr by apache.
the class PointInSetQuery method createWeight.
@Override
public final Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
// No docs in this segment/field indexed any points
return null;
}
if (values.getNumDimensions() != numDims) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + values.getNumDimensions() + " but this query has numDims=" + numDims);
}
if (values.getBytesPerDimension() != bytesPerDim) {
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
}
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
if (numDims == 1) {
// We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
values.intersect(new MergePointVisitor(sortedPackedPoints, result));
} else {
// NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
// optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
// index, which is probably tricky!
SinglePointVisitor visitor = new SinglePointVisitor(result);
TermIterator iterator = sortedPackedPoints.iterator();
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
visitor.setPoint(point);
values.intersect(visitor);
}
}
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}
Aggregations