use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.
the class BaseGeoPointTestCase method doRandomDistanceTest.
private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointsReader(readState);
}
};
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
double latRaw = nextLatitude();
double lonRaw = nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = quantizeLat(latRaw);
double lon = quantizeLon(lonRaw);
Document doc = new Document();
addPointToDoc("field", doc, lat, lon);
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = nextLatitude();
double lon = nextLongitude();
double radius = 50000000D * random().nextDouble();
BitSet expected = new BitSet();
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
if (distance <= radius) {
expected.set(doc);
}
}
TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
BitSet actual = new BitSet();
for (ScoreDoc doc : topDocs.scoreDocs) {
actual.set(doc.doc);
}
try {
assertEquals(expected, actual);
} catch (AssertionError e) {
System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
}
throw e;
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.
the class PointValuesWriter method flush.
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, PointsWriter writer) throws IOException {
PointValues points = new MutablePointValues() {
final int[] ords = new int[numPoints];
{
for (int i = 0; i < numPoints; ++i) {
ords[i] = i;
}
}
@Override
public void intersect(IntersectVisitor visitor) throws IOException {
final BytesRef scratch = new BytesRef();
final byte[] packedValue = new byte[packedBytesLength];
for (int i = 0; i < numPoints; i++) {
getValue(i, scratch);
assert scratch.length == packedValue.length;
System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
visitor.visit(getDocID(i), packedValue);
}
}
@Override
public long estimatePointCount(IntersectVisitor visitor) {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMinPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public byte[] getMaxPackedValue() {
throw new UnsupportedOperationException();
}
@Override
public int getNumDimensions() {
throw new UnsupportedOperationException();
}
@Override
public int getBytesPerDimension() {
throw new UnsupportedOperationException();
}
@Override
public long size() {
return numPoints;
}
@Override
public int getDocCount() {
return numDocs;
}
@Override
public void swap(int i, int j) {
int tmp = ords[i];
ords[i] = ords[j];
ords[j] = tmp;
}
@Override
public int getDocID(int i) {
return docIDs[ords[i]];
}
@Override
public void getValue(int i, BytesRef packedValue) {
final long offset = (long) packedBytesLength * ords[i];
packedValue.length = packedBytesLength;
bytes.setRawBytesRef(packedValue, offset);
}
@Override
public byte getByteAt(int i, int k) {
final long offset = (long) packedBytesLength * ords[i] + k;
return bytes.readByte(offset);
}
};
final PointValues values;
if (sortMap == null) {
values = points;
} else {
values = new MutableSortingPointValues((MutablePointValues) points, sortMap);
}
PointsReader reader = new PointsReader() {
@Override
public PointValues getValues(String fieldName) {
if (fieldName.equals(fieldInfo.name) == false) {
throw new IllegalArgumentException("fieldName must be the same");
}
return values;
}
@Override
public void checkIntegrity() {
throw new UnsupportedOperationException();
}
@Override
public long ramBytesUsed() {
return 0L;
}
@Override
public void close() {
}
};
writer.writeField(fieldInfo, reader);
}
use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.
the class CheckIndex method testPoints.
/**
* Test the points index
* @lucene.experimental
*/
public static Status.PointsStatus testPoints(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
if (infoStream != null) {
infoStream.print(" test: points..............");
}
long startNS = System.nanoTime();
FieldInfos fieldInfos = reader.getFieldInfos();
Status.PointsStatus status = new Status.PointsStatus();
try {
if (fieldInfos.hasPointValues()) {
PointsReader pointsReader = reader.getPointsReader();
if (pointsReader == null) {
throw new RuntimeException("there are fields with points, but reader.getPointsReader() is null");
}
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.getPointDimensionCount() > 0) {
PointValues values = pointsReader.getValues(fieldInfo.name);
if (values == null) {
continue;
}
status.totalValueFields++;
long size = values.size();
int docCount = values.getDocCount();
final long crossCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_CROSSES_QUERY));
if (crossCost < size / 2) {
throw new RuntimeException("estimatePointCount should return >= size/2 when all cells match");
}
final long insideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_INSIDE_QUERY));
if (insideCost < size) {
throw new RuntimeException("estimatePointCount should return >= size when all cells fully match");
}
final long outsideCost = values.estimatePointCount(new ConstantRelationIntersectVisitor(Relation.CELL_OUTSIDE_QUERY));
if (outsideCost != 0) {
throw new RuntimeException("estimatePointCount should return 0 when no cells match");
}
VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values);
values.intersect(visitor);
if (visitor.getPointCountSeen() != size) {
throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + visitor.getPointCountSeen());
}
if (visitor.getDocCountSeen() != docCount) {
throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + visitor.getDocCountSeen());
}
status.totalValuePoints += visitor.getPointCountSeen();
}
}
}
msg(infoStream, String.format(Locale.ROOT, "OK [%d fields, %d points] [took %.3f sec]", status.totalValueFields, status.totalValuePoints, nsToSec(System.nanoTime() - startNS)));
} catch (Throwable e) {
if (failFast) {
throw IOUtils.rethrowAlways(e);
}
msg(infoStream, "ERROR: " + e);
status.error = e;
if (infoStream != null) {
e.printStackTrace(infoStream);
}
}
return status;
}
use of org.apache.lucene.codecs.PointsReader in project lucene-solr by apache.
the class Lucene60PointsWriter method merge.
@Override
public void merge(MergeState mergeState) throws IOException {
/**
* If indexSort is activated and some of the leaves are not sorted the next test will catch that and the non-optimized merge will run.
* If the readers are all sorted then it's safe to perform a bulk merge of the points.
**/
for (PointsReader reader : mergeState.pointsReaders) {
if (reader instanceof Lucene60PointsReader == false) {
// We can only bulk merge when all to-be-merged segments use our format:
super.merge(mergeState);
return;
}
}
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
reader.checkIntegrity();
}
}
for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
if (fieldInfo.getPointDimensionCount() != 0) {
if (fieldInfo.getPointDimensionCount() == 1) {
boolean singleValuePerDoc = true;
// Worst case total maximum size (if none of the points are deleted):
long totMaxSize = 0;
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
PointValues values = reader.getValues(fieldInfo.name);
if (values != null) {
totMaxSize += values.size();
singleValuePerDoc &= values.size() == values.getDocCount();
}
}
}
}
// we were simply reindexing them:
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), maxPointsInLeafNode, maxMBSortInHeap, totMaxSize, singleValuePerDoc)) {
List<BKDReader> bkdReaders = new ArrayList<>();
List<MergeState.DocMap> docMaps = new ArrayList<>();
for (int i = 0; i < mergeState.pointsReaders.length; i++) {
PointsReader reader = mergeState.pointsReaders[i];
if (reader != null) {
// we confirmed this up above
assert reader instanceof Lucene60PointsReader;
Lucene60PointsReader reader60 = (Lucene60PointsReader) reader;
// NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to this
// reader's FieldInfo as we do below) because field numbers can easily be different
// when addIndexes(Directory...) copies over segments from another index:
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
if (bkdReader != null) {
bkdReaders.add(bkdReader);
docMaps.add(mergeState.docMaps[i]);
}
}
}
}
long fp = writer.merge(dataOut, docMaps, bkdReaders);
if (fp != -1) {
indexFPs.put(fieldInfo.name, fp);
}
}
} else {
mergeOneField(mergeState, fieldInfo);
}
}
}
finish();
}
Aggregations