use of org.apache.lucene.index.SoftDeletesRetentionMergePolicy in project crate by crate.
the class InternalEngine method getIndexWriterConfig.
private IndexWriterConfig getIndexWriterConfig() {
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
// we by default don't commit on close
iwc.setCommitOnClose(false);
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
boolean verbose = false;
try {
verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
} catch (Exception ignore) {
// ignored
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
// Give us the opportunity to upgrade old segments while performing
// background merges
MergePolicy mergePolicy = config().getMergePolicy();
// always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery, new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)));
}
boolean shuffleForcedMerge = Booleans.parseBoolean(System.getProperty("es.shuffle_forced_merge", Boolean.TRUE.toString()));
if (shuffleForcedMerge) {
// We wrap the merge policy for all indices even though it is mostly useful for time-based indices
// but there should be no overhead for other type of indices so it's simpler than adding a setting
// to enable it.
mergePolicy = new ShuffleForcedMergePolicy(mergePolicy);
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
// always use compound on flush - reduces # of file-handles on refresh
iwc.setUseCompoundFile(true);
return iwc;
}
use of org.apache.lucene.index.SoftDeletesRetentionMergePolicy in project crate by crate.
the class InternalEngineTests method testNoOps.
/*
* This test tests that a no-op does not generate a new sequence number, that no-ops can advance the local checkpoint, and that no-ops
* are correctly added to the translog.
*/
@Test
public void testNoOps() throws IOException {
engine.close();
InternalEngine noOpEngine = null;
final int maxSeqNo = randomIntBetween(0, 128);
final int localCheckpoint = randomIntBetween(0, maxSeqNo);
try {
final BiFunction<Long, Long, LocalCheckpointTracker> supplier = (ms, lcp) -> new LocalCheckpointTracker(maxSeqNo, localCheckpoint);
EngineConfig noopEngineConfig = copy(engine.config(), new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, () -> new MatchAllDocsQuery(), engine.config().getMergePolicy()));
noOpEngine = new InternalEngine(noopEngineConfig, supplier) {
@Override
protected long doGenerateSeqNoForOperation(Operation operation) {
throw new UnsupportedOperationException();
}
};
noOpEngine.recoverFromTranslog(translogHandler, Long.MAX_VALUE);
final int gapsFilled = noOpEngine.fillSeqNoGaps(primaryTerm.get());
final String reason = "filling gaps";
noOpEngine.noOp(new Engine.NoOp(maxSeqNo + 1, primaryTerm.get(), LOCAL_TRANSLOG_RECOVERY, System.nanoTime(), reason));
assertThat(noOpEngine.getProcessedLocalCheckpoint(), equalTo((long) (maxSeqNo + 1)));
assertThat(noOpEngine.getTranslog().stats().getUncommittedOperations(), equalTo(gapsFilled));
noOpEngine.noOp(new Engine.NoOp(maxSeqNo + 2, primaryTerm.get(), randomFrom(PRIMARY, REPLICA, PEER_RECOVERY), System.nanoTime(), reason));
assertThat(noOpEngine.getProcessedLocalCheckpoint(), equalTo((long) (maxSeqNo + 2)));
assertThat(noOpEngine.getTranslog().stats().getUncommittedOperations(), equalTo(gapsFilled + 1));
// skip to the op that we added to the translog
Translog.Operation op;
Translog.Operation last = null;
try (Translog.Snapshot snapshot = noOpEngine.getTranslog().newSnapshot()) {
while ((op = snapshot.next()) != null) {
last = op;
}
}
assertNotNull(last);
assertThat(last, instanceOf(Translog.NoOp.class));
final Translog.NoOp noOp = (Translog.NoOp) last;
assertThat(noOp.seqNo(), equalTo((long) (maxSeqNo + 2)));
assertThat(noOp.primaryTerm(), equalTo(primaryTerm.get()));
assertThat(noOp.reason(), equalTo(reason));
if (engine.engineConfig.getIndexSettings().isSoftDeleteEnabled()) {
MapperService mapperService = createMapperService("test");
List<Translog.Operation> operationsFromLucene = readAllOperationsInLucene(noOpEngine, mapperService);
// fills n gap and 2 manual noop.
assertThat(operationsFromLucene, hasSize(maxSeqNo + 2 - localCheckpoint));
for (int i = 0; i < operationsFromLucene.size(); i++) {
assertThat(operationsFromLucene.get(i), equalTo(new Translog.NoOp(localCheckpoint + 1 + i, primaryTerm.get(), "filling gaps")));
}
assertConsistentHistoryBetweenTranslogAndLuceneIndex(noOpEngine, mapperService);
}
} finally {
IOUtils.close(noOpEngine);
}
}
use of org.apache.lucene.index.SoftDeletesRetentionMergePolicy in project crate by crate.
the class InternalEngineTests method assertOperationHistoryInLucene.
private void assertOperationHistoryInLucene(List<Engine.Operation> operations) throws IOException {
final MergePolicy keepSoftDeleteDocsMP = new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, MatchAllDocsQuery::new, engine.config().getMergePolicy());
Settings.Builder settings = Settings.builder().put(defaultSettings.getSettings()).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING.getKey(), randomLongBetween(0, 10));
final IndexMetadata indexMetadata = IndexMetadata.builder(defaultSettings.getIndexMetadata()).settings(settings).build();
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexMetadata);
Set<Long> expectedSeqNos = new HashSet<>();
try (Store store = createStore();
Engine engine = createEngine(config(indexSettings, store, createTempDir(), keepSoftDeleteDocsMP, null))) {
for (Engine.Operation op : operations) {
if (op instanceof Engine.Index) {
Engine.IndexResult indexResult = engine.index((Engine.Index) op);
assertThat(indexResult.getFailure(), nullValue());
expectedSeqNos.add(indexResult.getSeqNo());
} else {
Engine.DeleteResult deleteResult = engine.delete((Engine.Delete) op);
assertThat(deleteResult.getFailure(), nullValue());
expectedSeqNos.add(deleteResult.getSeqNo());
}
if (rarely()) {
engine.refresh("test");
}
if (rarely()) {
engine.flush();
}
if (rarely()) {
engine.forceMerge(true, 1, false, false, false, UUIDs.randomBase64UUID());
}
}
MapperService mapperService = createMapperService("test");
List<Translog.Operation> actualOps = readAllOperationsInLucene(engine, mapperService);
assertThat(actualOps.stream().map(o -> o.seqNo()).collect(Collectors.toList()), containsInAnyOrder(expectedSeqNos.toArray()));
assertConsistentHistoryBetweenTranslogAndLuceneIndex(engine, mapperService);
}
}
use of org.apache.lucene.index.SoftDeletesRetentionMergePolicy in project crate by crate.
the class InternalEngineTests method testLookupVersionWithPrunedAwayIds.
/*
* we are testing an edge case here where we have a fully deleted segment that is retained but has all it's IDs pruned away.
*/
@Test
public void testLookupVersionWithPrunedAwayIds() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Lucene.STANDARD_ANALYZER);
indexWriterConfig.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
try (IndexWriter writer = new IndexWriter(dir, indexWriterConfig.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, MatchAllDocsQuery::new, new PrunePostingsMergePolicy(indexWriterConfig.getMergePolicy(), "_id"))))) {
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
doc.add(new Field(IdFieldMapper.NAME, "1", IdFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, -1));
doc.add(new NumericDocValuesField(SeqNoFieldMapper.NAME, 1));
doc.add(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 1));
writer.addDocument(doc);
writer.flush();
writer.softUpdateDocument(new Term(IdFieldMapper.NAME, "1"), doc, new NumericDocValuesField(Lucene.SOFT_DELETES_FIELD, 1));
writer.updateNumericDocValue(new Term(IdFieldMapper.NAME, "1"), Lucene.SOFT_DELETES_FIELD, 1);
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
assertEquals(1, reader.leaves().size());
assertNull(VersionsAndSeqNoResolver.loadDocIdAndVersion(reader, new Term(IdFieldMapper.NAME, "1"), false));
}
}
}
}
use of org.apache.lucene.index.SoftDeletesRetentionMergePolicy in project crate by crate.
the class PrunePostingsMergePolicyTests method testPrune.
@Test
public void testPrune() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setSoftDeletesField("_soft_deletes");
MergePolicy mp = new SoftDeletesRetentionMergePolicy("_soft_deletes", MatchAllDocsQuery::new, new PrunePostingsMergePolicy(newLogMergePolicy(), "id"));
iwc.setMergePolicy(new ShuffleForcedMergePolicy(mp));
boolean sorted = randomBoolean();
if (sorted) {
iwc.setIndexSort(new Sort(new SortField("sort", SortField.Type.INT)));
}
int numUniqueDocs = randomIntBetween(1, 100);
int numDocs = randomIntBetween(numUniqueDocs, numUniqueDocs * 5);
try (IndexWriter writer = new IndexWriter(dir, iwc)) {
for (int i = 0; i < numDocs; i++) {
if (rarely()) {
writer.flush();
}
if (rarely()) {
writer.forceMerge(1, false);
}
int id = i % numUniqueDocs;
Document doc = new Document();
doc.add(new StringField("id", "" + id, Field.Store.NO));
doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
doc.add(new NumericDocValuesField("sort", i));
writer.softUpdateDocument(new Term("id", "" + id), doc, new NumericDocValuesField("_soft_deletes", 1));
if (i == 0) {
// make sure we have at least 2 segments to ensure we do an actual merge to kick out all postings for
// soft deletes
writer.flush();
}
}
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("id");
TermsEnum iterator = id.iterator();
for (int i = 0; i < numUniqueDocs; i++) {
assertTrue(iterator.seekExact(new BytesRef("" + i)));
assertEquals(1, iterator.docFreq());
}
iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
int numValues = 0;
NumericDocValues sort = leafReader.getNumericDocValues("sort");
while (sort.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (sorted) {
assertEquals(sort.docID(), sort.longValue());
} else {
assertTrue(sort.longValue() >= 0);
assertTrue(sort.longValue() < numDocs);
}
numValues++;
}
assertEquals(numValues, numDocs);
}
{
// prune away a single ID
Document doc = new Document();
doc.add(new StringField("id", "test", Field.Store.NO));
writer.deleteDocuments(new Term("id", "test"));
writer.flush();
writer.forceMerge(1);
// delete it
writer.updateNumericDocValue(new Term("id", "test"), "_soft_deletes", 1);
writer.flush();
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("id");
TermsEnum iterator = id.iterator();
assertEquals(numUniqueDocs, id.size());
for (int i = 0; i < numUniqueDocs; i++) {
assertTrue(iterator.seekExact(new BytesRef("" + i)));
assertEquals(1, iterator.docFreq());
}
assertFalse(iterator.seekExact(new BytesRef("test")));
iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
}
}
{
// drop all ids
// first add a doc such that we can force merge
Document doc = new Document();
doc.add(new StringField("id", "" + 0, Field.Store.NO));
doc.add(newTextField("text", "the quick brown fox", Field.Store.YES));
doc.add(new NumericDocValuesField("sort", 0));
writer.softUpdateDocument(new Term("id", "" + 0), doc, new NumericDocValuesField("_soft_deletes", 1));
for (int i = 0; i < numUniqueDocs; i++) {
writer.updateNumericDocValue(new Term("id", "" + i), "_soft_deletes", 1);
}
writer.flush();
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
LeafReader leafReader = reader.leaves().get(0).reader();
assertEquals(numDocs + 1, leafReader.maxDoc());
assertEquals(0, leafReader.numDocs());
assertNull(leafReader.terms("id"));
TermsEnum iterator = leafReader.terms("text").iterator();
assertTrue(iterator.seekExact(new BytesRef("quick")));
assertEquals(leafReader.maxDoc(), iterator.docFreq());
}
}
}
}
}
Aggregations