use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomDistances.
protected void verifyRandomDistances(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// Distance
final double centerLat = nextLatitude();
final double centerLon = nextLongitude();
// So the query can cover at most 50% of the earth's surface:
final double radiusMeters = random().nextDouble() * GeoUtils.EARTH_MEAN_RADIUS_METERS * Math.PI / 2.0 + 1.0;
if (VERBOSE) {
final DecimalFormat df = new DecimalFormat("#,###.00", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
System.out.println(" radiusMeters = " + df.format(radiusMeters));
}
Query query = newDistanceQuery(FIELD_NAME, centerLat, centerLon, radiusMeters);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]) <= radiusMeters;
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
if (Double.isNaN(lats[id]) == false) {
double distanceMeters = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]);
b.append(" centerLat=" + centerLat + " centerLon=" + centerLon + " distanceMeters=" + distanceMeters + " vs radiusMeters=" + radiusMeters);
}
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomPolygons.
protected void verifyRandomPolygons(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
// We can't wrap with "exotic" readers because points needs to work:
IndexSearcher s = newSearcher(r);
final int iters = atLeast(75);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// Polygon
Polygon polygon = nextPolygon();
Query query = newPolygonQuery(FIELD_NAME, polygon);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = GeoTestUtil.containsSlowly(polygon, lats[id], lons[id]);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
b.append(" polygon=" + polygon);
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class TestNearest method testNearestNeighborRandom.
public void testNearestNeighborRandom() throws Exception {
int numPoints = atLeast(5000);
Directory dir;
if (numPoints > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
double[] lats = new double[numPoints];
double[] lons = new double[numPoints];
IndexWriterConfig iwc = getIndexWriterConfig();
iwc.setMergePolicy(newLogMergePolicy());
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int id = 0; id < numPoints; id++) {
lats[id] = quantizeLat(GeoTestUtil.nextLatitude());
lons[id] = quantizeLon(GeoTestUtil.nextLongitude());
Document doc = new Document();
doc.add(new LatLonPoint("point", lats[id], lons[id]));
doc.add(new LatLonDocValuesField("point", lats[id], lons[id]));
doc.add(new StoredField("id", id));
w.addDocument(doc);
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
DirectoryReader r = w.getReader();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
// can't wrap because we require Lucene60PointsFormat directly but e.g. ParallelReader wraps with its own points impl:
IndexSearcher s = newSearcher(r, false);
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
}
double pointLat = GeoTestUtil.nextLatitude();
double pointLon = GeoTestUtil.nextLongitude();
// dumb brute force search to get the expected result:
NearestHit[] expectedHits = new NearestHit[lats.length];
for (int id = 0; id < lats.length; id++) {
NearestHit hit = new NearestHit();
hit.distanceMeters = SloppyMath.haversinMeters(pointLat, pointLon, lats[id], lons[id]);
hit.docID = id;
expectedHits[id] = hit;
}
Arrays.sort(expectedHits, new Comparator<NearestHit>() {
@Override
public int compare(NearestHit a, NearestHit b) {
int cmp = Double.compare(a.distanceMeters, b.distanceMeters);
if (cmp != 0) {
return cmp;
}
// tie break by smaller docID:
return a.docID - b.docID;
}
});
int topN = TestUtil.nextInt(random(), 1, lats.length);
if (VERBOSE) {
System.out.println("\nhits for pointLat=" + pointLat + " pointLon=" + pointLon);
}
// Also test with MatchAllDocsQuery, sorting by distance:
TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon)));
ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs;
for (int i = 0; i < topN; i++) {
NearestHit expected = expectedHits[i];
FieldDoc expected2 = (FieldDoc) fieldDocs.scoreDocs[i];
FieldDoc actual = (FieldDoc) hits[i];
Document actualDoc = r.document(actual.doc);
if (VERBOSE) {
System.out.println("hit " + i);
System.out.println(" expected id=" + expected.docID + " lat=" + lats[expected.docID] + " lon=" + lons[expected.docID] + " distance=" + expected.distanceMeters + " meters");
System.out.println(" actual id=" + actualDoc.getField("id") + " distance=" + actual.fields[0] + " meters");
}
assertEquals(expected.docID, actual.doc);
assertEquals(expected.distanceMeters, ((Double) actual.fields[0]).doubleValue(), 0.0);
assertEquals(expected.docID, expected.docID);
assertEquals(((Double) expected2.fields[0]).doubleValue(), expected.distanceMeters, 0.0);
}
}
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class TestLatLonPointDistanceSort method doRandomTest.
private void doRandomTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// else seeds may not to reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StoredField("id", i));
doc.add(new NumericDocValuesField("id", i));
if (random().nextInt(10) > 7) {
double latRaw = GeoTestUtil.nextLatitude();
double lonRaw = GeoTestUtil.nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = decodeLatitude(encodeLatitude(latRaw));
double lon = decodeLongitude(encodeLongitude(lonRaw));
doc.add(new LatLonDocValuesField("field", lat, lon));
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
}
// otherwise "missing"
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = GeoTestUtil.nextLatitude();
double lon = GeoTestUtil.nextLongitude();
double missingValue = Double.POSITIVE_INFINITY;
Result[] expected = new Result[reader.maxDoc()];
for (int doc = 0; doc < reader.maxDoc(); doc++) {
Document targetDoc = reader.document(doc);
final double distance;
if (targetDoc.getField("lat") == null) {
// missing
distance = missingValue;
} else {
double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
}
int id = targetDoc.getField("id").numericValue().intValue();
expected[doc] = new Result(id, distance);
}
Arrays.sort(expected);
// randomize the topN a bit
int topN = TestUtil.nextInt(random(), 1, reader.maxDoc());
// sort by distance, then ID
SortField distanceSort = LatLonDocValuesField.newDistanceSort("field", lat, lon);
distanceSort.setMissingValue(missingValue);
Sort sort = new Sort(distanceSort, new SortField("id", SortField.Type.INT));
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topN, sort);
for (int resultNumber = 0; resultNumber < topN; resultNumber++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[resultNumber];
Result actual = new Result((Integer) fieldDoc.fields[1], (Double) fieldDoc.fields[0]);
assertEquals(expected[resultNumber], actual);
}
// get page2 with searchAfter()
if (topN < reader.maxDoc()) {
int page2 = TestUtil.nextInt(random(), 1, reader.maxDoc() - topN);
TopDocs topDocs2 = searcher.searchAfter(topDocs.scoreDocs[topN - 1], new MatchAllDocsQuery(), page2, sort);
for (int resultNumber = 0; resultNumber < page2; resultNumber++) {
FieldDoc fieldDoc = (FieldDoc) topDocs2.scoreDocs[resultNumber];
Result actual = new Result((Integer) fieldDoc.fields[1], (Double) fieldDoc.fields[0]);
assertEquals(expected[topN + resultNumber], actual);
}
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method testRectBoundariesAreInclusive.
public void testRectBoundariesAreInclusive() throws Exception {
Rectangle rect;
// TODO: why this dateline leniency???
while (true) {
rect = nextBox();
if (rect.crossesDateline() == false) {
break;
}
}
// this test works in quantized space: for testing inclusiveness of exact edges it must be aware of index-time quantization!
rect = new Rectangle(quantizeLat(rect.minLat), quantizeLat(rect.maxLat), quantizeLon(rect.minLon), quantizeLon(rect.maxLon));
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int x = 0; x < 3; x++) {
double lat;
if (x == 0) {
lat = rect.minLat;
} else if (x == 1) {
lat = quantizeLat((rect.minLat + rect.maxLat) / 2.0);
} else {
lat = rect.maxLat;
}
for (int y = 0; y < 3; y++) {
double lon;
if (y == 0) {
lon = rect.minLon;
} else if (y == 1) {
if (x == 1) {
continue;
}
lon = quantizeLon((rect.minLon + rect.maxLon) / 2.0);
} else {
lon = rect.maxLon;
}
Document doc = new Document();
addPointToDoc(FIELD_NAME, doc, lat, lon);
w.addDocument(doc);
}
}
IndexReader r = w.getReader();
IndexSearcher s = newSearcher(r, false);
// exact edge cases
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon)));
// expand 1 ulp in each direction if possible and test a slightly larger box!
if (rect.minLat != -90) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, Math.nextDown(rect.minLat), rect.maxLat, rect.minLon, rect.maxLon)));
}
if (rect.maxLat != 90) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, Math.nextUp(rect.maxLat), rect.minLon, rect.maxLon)));
}
if (rect.minLon != -180) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, Math.nextDown(rect.minLon), rect.maxLon)));
}
if (rect.maxLon != 180) {
assertEquals(8, s.count(newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, Math.nextUp(rect.maxLon))));
}
// we can't do this if rectangle is actually a line or we will create a cross-dateline query
if (rect.minLat != 90 && rect.maxLat != -90 && rect.minLon != 80 && rect.maxLon != -180 && rect.minLon != rect.maxLon) {
// note we put points on "sides" not just "corners" so we just shrink all 4 at once for now: it should exclude all points!
assertEquals(0, s.count(newRectQuery(FIELD_NAME, Math.nextUp(rect.minLat), Math.nextDown(rect.maxLat), Math.nextUp(rect.minLon), Math.nextDown(rect.maxLon))));
}
r.close();
w.close();
dir.close();
}
Aggregations