use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.
the class TestIDVersionPostingsFormat method testRandom.
// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
//IndexWriter w = new IndexWriter(dir, iwc);
int numDocs = atLeast(1000);
Map<String, Long> idValues = new HashMap<String, Long>();
int docUpto = 0;
if (VERBOSE) {
System.out.println("TEST: numDocs=" + numDocs);
}
IDSource ids = getRandomIDs();
String idPrefix;
if (random().nextBoolean()) {
idPrefix = "";
} else {
idPrefix = TestUtil.randomSimpleString(random());
if (VERBOSE) {
System.out.println("TEST: use id prefix: " + idPrefix);
}
}
boolean useMonotonicVersion = random().nextBoolean();
if (VERBOSE) {
System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
}
List<String> idsList = new ArrayList<>();
long version = 0;
while (docUpto < numDocs) {
String idValue = idPrefix + ids.next();
if (idValues.containsKey(idValue)) {
continue;
}
if (useMonotonicVersion) {
version += TestUtil.nextInt(random(), 1, 10);
} else {
version = random().nextLong() & 0x3fffffffffffffffL;
}
idValues.put(idValue, version);
if (VERBOSE) {
System.out.println(" " + idValue + " -> " + version);
}
Document doc = new Document();
doc.add(makeIDField(idValue, version));
w.addDocument(doc);
idsList.add(idValue);
if (idsList.size() > 0 && random().nextInt(7) == 5) {
// Randomly delete or update a previous ID
idValue = idsList.get(random().nextInt(idsList.size()));
if (random().nextBoolean()) {
if (useMonotonicVersion) {
version += TestUtil.nextInt(random(), 1, 10);
} else {
version = random().nextLong() & 0x3fffffffffffffffL;
}
doc = new Document();
doc.add(makeIDField(idValue, version));
if (VERBOSE) {
System.out.println(" update " + idValue + " -> " + version);
}
w.updateDocument(new Term("id", idValue), doc);
idValues.put(idValue, version);
} else {
if (VERBOSE) {
System.out.println(" delete " + idValue);
}
w.deleteDocuments(new Term("id", idValue));
idValues.remove(idValue);
}
}
docUpto++;
}
IndexReader r = w.getReader();
//IndexReader r = DirectoryReader.open(w);
PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
int iters = numDocs * 5;
for (int iter = 0; iter < iters; iter++) {
String idValue;
if (random().nextBoolean()) {
idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
} else if (random().nextBoolean()) {
idValue = ids.next();
} else {
idValue = idPrefix + TestUtil.randomSimpleString(random());
}
BytesRef idValueBytes = new BytesRef(idValue);
Long expectedVersion = idValues.get(idValue);
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
}
if (expectedVersion == null) {
assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
} else {
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" lookup exact version (should be found)");
}
assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
assertEquals(expectedVersion.longValue(), lookup.getVersion());
} else {
if (VERBOSE) {
System.out.println(" lookup version+1 (should not be found)");
}
assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
}
}
}
r.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.
the class TestIDVersionPostingsFormat method testInvalidPayload.
public void testInvalidPayload() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
w.commit();
});
w.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.
the class TestIDVersionPostingsFormat method testMoreThanOnceInSingleDoc.
public void testMoreThanOnceInSingleDoc() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(makeIDField("id", 17));
doc.add(makeIDField("id", 17));
expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc);
w.commit();
});
w.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.
the class BaseGeoPointTestCase method testMultiValued.
public void testMultiValued() throws Exception {
int numPoints = atLeast(10000);
// Every doc has 2 points:
double[] lats = new double[2 * numPoints];
double[] lons = new double[2 * numPoints];
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
// and on seeds being able to reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int id = 0; id < numPoints; id++) {
Document doc = new Document();
lats[2 * id] = quantizeLat(nextLatitude());
lons[2 * id] = quantizeLon(nextLongitude());
doc.add(newStringField("id", "" + id, Field.Store.YES));
addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
lats[2 * id + 1] = quantizeLat(nextLatitude());
lons[2 * id + 1] = quantizeLon(nextLongitude());
addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
if (VERBOSE) {
System.out.println("id=" + id);
System.out.println(" lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
System.out.println(" lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
}
w.addDocument(doc);
}
// TODO: share w/ verify; just need parallel array of the expected ids
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
for (int iter = 0; iter < iters; iter++) {
Rectangle rect = nextBox();
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
}
Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
for (int docID = 0; docID < lats.length / 2; docID++) {
double latDoc1 = lats[2 * docID];
double lonDoc1 = lons[2 * docID];
double latDoc2 = lats[2 * docID + 1];
double lonDoc2 = lons[2 * docID + 1];
boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
boolean expected = result1 || result2;
if (hits.get(docID) != expected) {
String id = s.doc(docID).get("id");
if (expected) {
System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
} else {
System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
}
System.out.println(" rect=" + rect);
System.out.println(" lat=" + latDoc1 + " lon=" + lonDoc1 + "\n lat=" + latDoc2 + " lon=" + lonDoc2);
System.out.println(" result1=" + result1 + " result2=" + result2);
fail = true;
}
}
if (fail) {
fail("some hits were wrong");
}
}
r.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.
the class BaseGeoPointTestCase method searchSmallSet.
/** return topdocs over a small set of points in field "point" */
private TopDocs searchSmallSet(Query query, int size) throws Exception {
// this is a simple systematic test, indexing these points
// TODO: fragile: does not understand quantization in any way yet uses extremely high precision!
double[][] pts = new double[][] { { 32.763420, -96.774 }, { 32.7559529921407, -96.7759895324707 }, { 32.77866942010977, -96.77701950073242 }, { 32.7756745755423, -96.7706036567688 }, { 27.703618681345585, -139.73458170890808 }, { 32.94823588839368, -96.4538113027811 }, { 33.06047141970814, -96.65084838867188 }, { 32.778650, -96.7772 }, { -88.56029371730983, -177.23537676036358 }, { 33.541429799076354, -26.779373834241003 }, { 26.774024500421728, -77.35379276106497 }, { -90.0, -14.796283808944777 }, { 32.94823588839368, -178.8538113027811 }, { 32.94823588839368, 178.8538113027811 }, { 40.720611, -73.998776 }, { -44.5, -179.5 } };
Directory directory = newDirectory();
// TODO: must these simple tests really rely on docid order?
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000));
iwc.setMergePolicy(newLogMergePolicy());
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
for (double[] p : pts) {
Document doc = new Document();
addPointToDoc("point", doc, p[0], p[1]);
writer.addDocument(doc);
}
// add explicit multi-valued docs
for (int i = 0; i < pts.length; i += 2) {
Document doc = new Document();
addPointToDoc("point", doc, pts[i][0], pts[i][1]);
addPointToDoc("point", doc, pts[i + 1][0], pts[i + 1][1]);
writer.addDocument(doc);
}
// index random string documents
for (int i = 0; i < random().nextInt(10); ++i) {
Document doc = new Document();
doc.add(new StringField("string", Integer.toString(i), Field.Store.NO));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
TopDocs topDocs = searcher.search(query, size);
reader.close();
directory.close();
return topDocs;
}
Aggregations