use of org.apache.lucene.index.LogDocMergePolicy in project openolat by klemens.
the class OlatFullIndexer method newLogMergePolicy.
public LogMergePolicy newLogMergePolicy() {
LogMergePolicy logmp = new LogDocMergePolicy();
logmp.setCalibrateSizeByDeletes(true);
logmp.setMergeFactor(INDEX_MERGE_FACTOR);
return logmp;
}
use of org.apache.lucene.index.LogDocMergePolicy in project crate by crate.
the class InternalEngineTests method testPruneAwayDeletedButRetainedIds.
@Test
public void testPruneAwayDeletedButRetainedIds() throws Exception {
IOUtils.close(engine, store);
Settings settings = Settings.builder().put(defaultSettings.getSettings()).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(IndexMetadata.builder(defaultSettings.getIndexMetadata()).settings(settings).build());
store = createStore(indexSettings, newDirectory());
LogDocMergePolicy policy = new LogDocMergePolicy();
policy.setMinMergeDocs(10000);
try (InternalEngine engine = createEngine(indexSettings, store, createTempDir(), policy)) {
int numDocs = between(1, 20);
logger.info("" + numDocs);
for (int i = 0; i < numDocs; i++) {
index(engine, i);
}
engine.forceMerge(true, 1, false, false, false, UUIDs.randomBase64UUID());
engine.delete(new Engine.Delete("0", newUid("0"), primaryTerm.get()));
engine.refresh("test");
// now we have 2 segments since we now added a tombstone plus the old segment with the delete
try (Searcher searcher = engine.acquireSearcher("test")) {
IndexReader reader = searcher.getIndexReader();
assertEquals(2, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
LeafReader leafReader = leafReaderContext.reader();
assertEquals("the delete and the tombstone", 1, leafReader.numDeletedDocs());
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("_id");
assertNotNull(id);
assertEquals("deleted IDs are NOT YET pruned away", reader.numDocs() + 1, id.size());
TermsEnum iterator = id.iterator();
assertTrue(iterator.seekExact(Uid.encodeId("0")));
}
// lets force merge the tombstone and the original segment and make sure the doc is still there but the ID term is gone
engine.forceMerge(true, 1, false, false, false, UUIDs.randomBase64UUID());
engine.refresh("test");
try (Searcher searcher = engine.acquireSearcher("test")) {
IndexReader reader = searcher.getIndexReader();
assertEquals(1, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
LeafReader leafReader = leafReaderContext.reader();
assertEquals("the delete and the tombstone", 2, leafReader.numDeletedDocs());
assertEquals(numDocs + 1, leafReader.maxDoc());
Terms id = leafReader.terms("_id");
if (numDocs == 1) {
// everything is pruned away
assertNull(id);
assertEquals(0, leafReader.numDocs());
} else {
assertNotNull(id);
assertEquals("deleted IDs are pruned away", reader.numDocs(), id.size());
TermsEnum iterator = id.iterator();
assertFalse(iterator.seekExact(Uid.encodeId("0")));
}
}
}
}
use of org.apache.lucene.index.LogDocMergePolicy in project elasticsearch by elastic.
the class InternalEngineTests method testRenewSyncFlush.
public void testRenewSyncFlush() throws Exception {
// run this a couple of times to get some coverage
final int iters = randomIntBetween(2, 5);
for (int i = 0; i < iters; i++) {
try (Store store = createStore();
InternalEngine engine = new InternalEngine(config(defaultSettings, store, createTempDir(), new LogDocMergePolicy(), IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, null))) {
final String syncId = randomUnicodeOfCodepointLengthBetween(10, 20);
Engine.Index doc1 = indexForDoc(testParsedDocument("1", "test", null, testDocumentWithTextField(), B_1, null));
engine.index(doc1);
assertEquals(engine.getLastWriteNanos(), doc1.startTime());
engine.flush();
Engine.Index doc2 = indexForDoc(testParsedDocument("2", "test", null, testDocumentWithTextField(), B_1, null));
engine.index(doc2);
assertEquals(engine.getLastWriteNanos(), doc2.startTime());
engine.flush();
final boolean forceMergeFlushes = randomBoolean();
final ParsedDocument parsedDoc3 = testParsedDocument("3", "test", null, testDocumentWithTextField(), B_1, null);
if (forceMergeFlushes) {
engine.index(new Engine.Index(newUid(parsedDoc3), parsedDoc3, SequenceNumbersService.UNASSIGNED_SEQ_NO, 0, Versions.MATCH_ANY, VersionType.INTERNAL, Engine.Operation.Origin.PRIMARY, System.nanoTime() - engine.engineConfig.getFlushMergesAfter().nanos(), -1, false));
} else {
engine.index(indexForDoc(parsedDoc3));
}
Engine.CommitId commitID = engine.flush();
assertEquals("should succeed to flush commit with right id and no pending doc", engine.syncFlush(syncId, commitID), Engine.SyncedFlushResult.SUCCESS);
assertEquals(3, engine.segments(false).size());
engine.forceMerge(forceMergeFlushes, 1, false, false, false);
if (forceMergeFlushes == false) {
engine.refresh("make all segments visible");
assertEquals(4, engine.segments(false).size());
assertEquals(store.readLastCommittedSegmentsInfo().getUserData().get(Engine.SYNC_COMMIT_ID), syncId);
assertEquals(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID), syncId);
assertTrue(engine.tryRenewSyncCommit());
assertEquals(1, engine.segments(false).size());
} else {
assertBusy(() -> assertEquals(1, engine.segments(false).size()));
}
assertEquals(store.readLastCommittedSegmentsInfo().getUserData().get(Engine.SYNC_COMMIT_ID), syncId);
assertEquals(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID), syncId);
if (randomBoolean()) {
Engine.Index doc4 = indexForDoc(testParsedDocument("4", "test", null, testDocumentWithTextField(), B_1, null));
engine.index(doc4);
assertEquals(engine.getLastWriteNanos(), doc4.startTime());
} else {
Engine.Delete delete = new Engine.Delete(doc1.type(), doc1.id(), doc1.uid());
engine.delete(delete);
assertEquals(engine.getLastWriteNanos(), delete.startTime());
}
assertFalse(engine.tryRenewSyncCommit());
// we might hit a concurrent flush from a finishing merge here - just wait if ongoing...
engine.flush(false, true);
assertNull(store.readLastCommittedSegmentsInfo().getUserData().get(Engine.SYNC_COMMIT_ID));
assertNull(engine.getLastCommittedSegmentInfos().getUserData().get(Engine.SYNC_COMMIT_ID));
}
}
}
use of org.apache.lucene.index.LogDocMergePolicy in project lucene-solr by apache.
the class TestPerFieldPostingsFormat2 method newWriter.
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf) throws IOException {
LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
// make sure we use plain
logByteSizeMergePolicy.setNoCFSRatio(0.0);
// files
conf.setMergePolicy(logByteSizeMergePolicy);
final IndexWriter writer = new IndexWriter(dir, conf);
return writer;
}
use of org.apache.lucene.index.LogDocMergePolicy in project openolat by klemens.
the class JmsIndexer method newLogMergePolicy.
public LogMergePolicy newLogMergePolicy() {
LogMergePolicy logmp = new LogDocMergePolicy();
logmp.setCalibrateSizeByDeletes(true);
logmp.setMergeFactor(INDEX_MERGE_FACTOR);
return logmp;
}
Aggregations