use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class TestLRUQueryCache method testRamBytesUsedAgreesWithRamUsageTester.
// This test makes sure that by making the same assumptions as LRUQueryCache, RAMUsageTester
// computes the same memory usage.
public void testRamBytesUsedAgreesWithRamUsageTester() throws IOException {
assumeFalse("LUCENE-7595: RamUsageTester does not work exact in Java 9 (estimations for maps and lists)", Constants.JRE_IS_MINIMUM_JAVA9);
final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(5), 1 + random().nextInt(10000), context -> random().nextBoolean());
// an accumulator that only sums up memory usage of referenced filters and doc id sets
final RamUsageTester.Accumulator acc = new RamUsageTester.Accumulator() {
@Override
public long accumulateObject(Object o, long shallowSize, Map<Field, Object> fieldValues, Collection<Object> queue) {
if (o instanceof DocIdSet) {
return ((DocIdSet) o).ramBytesUsed();
}
if (o instanceof Query) {
return queryCache.ramBytesUsed((Query) o);
}
if (o instanceof IndexReader || o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
// do not take readers or core cache keys into account
return 0;
}
if (o instanceof Map) {
Map<?, ?> map = (Map<?, ?>) o;
queue.addAll(map.keySet());
queue.addAll(map.values());
final long sizePerEntry = o instanceof LinkedHashMap ? LRUQueryCache.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY : LRUQueryCache.HASHTABLE_RAM_BYTES_PER_ENTRY;
return sizePerEntry * map.size();
}
// follow links to other objects, but ignore their memory usage
super.accumulateObject(o, shallowSize, fieldValues, queue);
return 0;
}
@Override
public long accumulateArray(Object array, long shallowSize, List<Object> values, Collection<Object> queue) {
// follow links to other objects, but ignore their memory usage
super.accumulateArray(array, shallowSize, values, queue);
return 0;
}
};
Directory dir = newDirectory();
// serial merges so that segments do not get closed while we are measuring ram usage
// with RamUsageTester
IndexWriterConfig iwc = newIndexWriterConfig().setMergeScheduler(new SerialMergeScheduler());
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
final List<String> colors = Arrays.asList("blue", "red", "green", "yellow");
Document doc = new Document();
StringField f = new StringField("color", "", Store.NO);
doc.add(f);
final int iters = atLeast(5);
for (int iter = 0; iter < iters; ++iter) {
final int numDocs = atLeast(10);
for (int i = 0; i < numDocs; ++i) {
f.setStringValue(RandomPicks.randomFrom(random(), colors));
w.addDocument(doc);
}
try (final DirectoryReader reader = w.getReader()) {
final IndexSearcher searcher = newSearcher(reader);
searcher.setQueryCache(queryCache);
searcher.setQueryCachingPolicy(MAYBE_CACHE_POLICY);
for (int i = 0; i < 3; ++i) {
final Query query = new TermQuery(new Term("color", RandomPicks.randomFrom(random(), colors)));
searcher.search(new ConstantScoreQuery(query), 1);
}
}
queryCache.assertConsistent();
assertEquals(RamUsageTester.sizeOf(queryCache, acc), queryCache.ramBytesUsed());
}
w.close();
dir.close();
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method testMultiValued.
public void testMultiValued() throws Exception {
int numPoints = atLeast(10000);
// Every doc has 2 points:
double[] lats = new double[2 * numPoints];
double[] lons = new double[2 * numPoints];
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
// and on seeds being able to reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int id = 0; id < numPoints; id++) {
Document doc = new Document();
lats[2 * id] = quantizeLat(nextLatitude());
lons[2 * id] = quantizeLon(nextLongitude());
doc.add(newStringField("id", "" + id, Field.Store.YES));
addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
lats[2 * id + 1] = quantizeLat(nextLatitude());
lons[2 * id + 1] = quantizeLon(nextLongitude());
addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
if (VERBOSE) {
System.out.println("id=" + id);
System.out.println(" lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
System.out.println(" lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
}
w.addDocument(doc);
}
// TODO: share w/ verify; just need parallel array of the expected ids
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
for (int iter = 0; iter < iters; iter++) {
Rectangle rect = nextBox();
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
}
Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
for (int docID = 0; docID < lats.length / 2; docID++) {
double latDoc1 = lats[2 * docID];
double lonDoc1 = lons[2 * docID];
double latDoc2 = lats[2 * docID + 1];
double lonDoc2 = lons[2 * docID + 1];
boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
boolean expected = result1 || result2;
if (hits.get(docID) != expected) {
String id = s.doc(docID).get("id");
if (expected) {
System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
} else {
System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
}
System.out.println(" rect=" + rect);
System.out.println(" lat=" + latDoc1 + " lon=" + lonDoc1 + "\n lat=" + latDoc2 + " lon=" + lonDoc2);
System.out.println(" result1=" + result1 + " result2=" + result2);
fail = true;
}
}
if (fail) {
fail("some hits were wrong");
}
}
r.close();
dir.close();
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method searchSmallSet.
/** return topdocs over a small set of points in field "point" */
private TopDocs searchSmallSet(Query query, int size) throws Exception {
// this is a simple systematic test, indexing these points
// TODO: fragile: does not understand quantization in any way yet uses extremely high precision!
double[][] pts = new double[][] { { 32.763420, -96.774 }, { 32.7559529921407, -96.7759895324707 }, { 32.77866942010977, -96.77701950073242 }, { 32.7756745755423, -96.7706036567688 }, { 27.703618681345585, -139.73458170890808 }, { 32.94823588839368, -96.4538113027811 }, { 33.06047141970814, -96.65084838867188 }, { 32.778650, -96.7772 }, { -88.56029371730983, -177.23537676036358 }, { 33.541429799076354, -26.779373834241003 }, { 26.774024500421728, -77.35379276106497 }, { -90.0, -14.796283808944777 }, { 32.94823588839368, -178.8538113027811 }, { 32.94823588839368, 178.8538113027811 }, { 40.720611, -73.998776 }, { -44.5, -179.5 } };
Directory directory = newDirectory();
// TODO: must these simple tests really rely on docid order?
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000));
iwc.setMergePolicy(newLogMergePolicy());
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
for (double[] p : pts) {
Document doc = new Document();
addPointToDoc("point", doc, p[0], p[1]);
writer.addDocument(doc);
}
// add explicit multi-valued docs
for (int i = 0; i < pts.length; i += 2) {
Document doc = new Document();
addPointToDoc("point", doc, pts[i][0], pts[i][1]);
addPointToDoc("point", doc, pts[i + 1][0], pts[i + 1][1]);
writer.addDocument(doc);
}
// index random string documents
for (int i = 0; i < random().nextInt(10); ++i) {
Document doc = new Document();
doc.add(new StringField("string", Integer.toString(i), Field.Store.NO));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
TopDocs topDocs = searcher.search(query, size);
reader.close();
directory.close();
return topDocs;
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomRectangles.
protected void verifyRandomRectangles(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
Rectangle rect = nextBox();
Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = rectContainsPoint(rect, lats[id], lons[id]);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
b.append("docID=(" + docID + ")\n");
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" box=" + rect + "\n");
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.index.SerialMergeScheduler in project lucene-solr by apache.
the class BaseGeoPointTestCase method doRandomDistanceTest.
private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointsReader(readState);
}
};
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
double latRaw = nextLatitude();
double lonRaw = nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = quantizeLat(latRaw);
double lon = quantizeLon(lonRaw);
Document doc = new Document();
addPointToDoc("field", doc, lat, lon);
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = nextLatitude();
double lon = nextLongitude();
double radius = 50000000D * random().nextDouble();
BitSet expected = new BitSet();
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
if (distance <= radius) {
expected.set(doc);
}
}
TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
BitSet actual = new BitSet();
for (ScoreDoc doc : topDocs.scoreDocs) {
actual.set(doc.doc);
}
try {
assertEquals(expected, actual);
} catch (AssertionError e) {
System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
}
throw e;
}
}
reader.close();
writer.close();
dir.close();
}
Aggregations