use of org.opensearch.index.translog.Translog in project OpenSearch by opensearch-project.
the class IndexShardIT method testMaybeFlush.
public void testMaybeFlush() throws Exception {
createIndex("test", Settings.builder().put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), Translog.Durability.REQUEST).build());
ensureGreen();
IndicesService indicesService = getInstanceFromNode(IndicesService.class);
IndexService test = indicesService.indexService(resolveIndex("test"));
IndexShard shard = test.getShardOrNull(0);
assertFalse(shard.shouldPeriodicallyFlush());
client().admin().indices().prepareUpdateSettings("test").setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(135, /* size of the operation + one generation header&footer*/
ByteSizeUnit.BYTES)).build()).get();
client().prepareIndex("test").setId("0").setSource("{}", XContentType.JSON).setRefreshPolicy(randomBoolean() ? IMMEDIATE : NONE).get();
assertFalse(shard.shouldPeriodicallyFlush());
shard.applyIndexOperationOnPrimary(Versions.MATCH_ANY, VersionType.INTERNAL, new SourceToParse("test", "_doc", "1", new BytesArray("{}"), XContentType.JSON), SequenceNumbers.UNASSIGNED_SEQ_NO, 0, IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, false);
assertTrue(shard.shouldPeriodicallyFlush());
final Translog translog = getTranslog(shard);
assertEquals(2, translog.stats().getUncommittedOperations());
assertThat(shard.flushStats().getTotal(), equalTo(0L));
client().prepareIndex("test").setId("2").setSource("{}", XContentType.JSON).setRefreshPolicy(randomBoolean() ? IMMEDIATE : NONE).get();
assertThat(shard.getLastKnownGlobalCheckpoint(), equalTo(2L));
assertBusy(() -> {
// this is async
assertFalse(shard.shouldPeriodicallyFlush());
assertThat(shard.flushStats().getPeriodic(), equalTo(1L));
assertThat(shard.flushStats().getTotal(), equalTo(1L));
});
shard.sync();
assertThat(shard.getLastSyncedGlobalCheckpoint(), equalTo(2L));
assertThat("last commit [" + shard.commitStats().getUserData() + "]", translog.stats().getUncommittedOperations(), equalTo(0));
long size = Math.max(translog.stats().getUncommittedSizeInBytes(), Translog.DEFAULT_HEADER_SIZE_IN_BYTES + 1);
logger.info("--> current translog size: [{}] num_ops [{}] generation [{}]", translog.stats().getUncommittedSizeInBytes(), translog.stats().getUncommittedOperations(), translog.getGeneration());
client().admin().indices().prepareUpdateSettings("test").setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(size, ByteSizeUnit.BYTES)).build()).get();
client().prepareDelete("test", "2").get();
logger.info("--> translog size after delete: [{}] num_ops [{}] generation [{}]", translog.stats().getUncommittedSizeInBytes(), translog.stats().getUncommittedOperations(), translog.getGeneration());
assertBusy(() -> {
// this is async
final TranslogStats translogStats = translog.stats();
final CommitStats commitStats = shard.commitStats();
final FlushStats flushStats = shard.flushStats();
logger.info("--> translog stats [{}] gen [{}] commit_stats [{}] flush_stats [{}/{}]", Strings.toString(translogStats), translog.getGeneration().translogFileGeneration, commitStats.getUserData(), flushStats.getPeriodic(), flushStats.getTotal());
assertFalse(shard.shouldPeriodicallyFlush());
});
shard.sync();
assertEquals(0, translog.stats().getUncommittedOperations());
}
use of org.opensearch.index.translog.Translog in project OpenSearch by opensearch-project.
the class InternalEngine method innerNoOp.
private NoOpResult innerNoOp(final NoOp noOp) throws IOException {
assert readLock.isHeldByCurrentThread() || writeLock.isHeldByCurrentThread();
assert noOp.seqNo() > SequenceNumbers.NO_OPS_PERFORMED;
final long seqNo = noOp.seqNo();
try (Releasable ignored = noOpKeyedLock.acquire(seqNo)) {
final NoOpResult noOpResult;
final Optional<Exception> preFlightError = preFlightCheckForNoOp(noOp);
if (preFlightError.isPresent()) {
noOpResult = new NoOpResult(SequenceNumbers.UNASSIGNED_PRIMARY_TERM, SequenceNumbers.UNASSIGNED_SEQ_NO, preFlightError.get());
} else {
markSeqNoAsSeen(noOp.seqNo());
if (hasBeenProcessedBefore(noOp) == false) {
try {
final ParsedDocument tombstone = engineConfig.getTombstoneDocSupplier().newNoopTombstoneDoc(noOp.reason());
tombstone.updateSeqID(noOp.seqNo(), noOp.primaryTerm());
// A noop tombstone does not require a _version but it's added to have a fully dense docvalues for the version
// field. 1L is selected to optimize the compression because it might probably be the most common value in
// version field.
tombstone.version().setLongValue(1L);
assert tombstone.docs().size() == 1 : "Tombstone should have a single doc [" + tombstone + "]";
final ParseContext.Document doc = tombstone.docs().get(0);
assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null : "Noop tombstone document but _tombstone field is not set [" + doc + " ]";
doc.add(softDeletesField);
indexWriter.addDocument(doc);
} catch (final Exception ex) {
/*
* Document level failures when adding a no-op are unexpected, we likely hit something fatal such as the Lucene
* index being corrupt, or the Lucene document limit. We have already issued a sequence number here so this is
* fatal, fail the engine.
*/
if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) {
failEngine("no-op origin[" + noOp.origin() + "] seq#[" + noOp.seqNo() + "] failed at document level", ex);
}
throw ex;
}
}
noOpResult = new NoOpResult(noOp.primaryTerm(), noOp.seqNo());
if (noOp.origin().isFromTranslog() == false && noOpResult.getResultType() == Result.Type.SUCCESS) {
final Translog.Location location = translog.add(new Translog.NoOp(noOp.seqNo(), noOp.primaryTerm(), noOp.reason()));
noOpResult.setTranslogLocation(location);
}
}
localCheckpointTracker.markSeqNoAsProcessed(noOpResult.getSeqNo());
if (noOpResult.getTranslogLocation() == null) {
// the op is coming from the translog (and is hence persisted already) or it does not have a sequence number
assert noOp.origin().isFromTranslog() || noOpResult.getSeqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO;
localCheckpointTracker.markSeqNoAsPersisted(noOpResult.getSeqNo());
}
noOpResult.setTook(System.nanoTime() - noOp.startTime());
noOpResult.freeze();
return noOpResult;
}
}
use of org.opensearch.index.translog.Translog in project OpenSearch by opensearch-project.
the class InternalEngine method index.
@Override
public IndexResult index(Index index) throws IOException {
assert Objects.equals(index.uid().field(), IdFieldMapper.NAME) : index.uid().field();
final boolean doThrottle = index.origin().isRecovery() == false;
try (ReleasableLock releasableLock = readLock.acquire()) {
ensureOpen();
assert assertIncomingSequenceNumber(index.origin(), index.seqNo());
int reservedDocs = 0;
try (Releasable ignored = versionMap.acquireLock(index.uid().bytes());
Releasable indexThrottle = doThrottle ? throttle.acquireThrottle() : () -> {
}) {
lastWriteNanos = index.startTime();
/* A NOTE ABOUT APPEND ONLY OPTIMIZATIONS:
* if we have an autoGeneratedID that comes into the engine we can potentially optimize
* and just use addDocument instead of updateDocument and skip the entire version and index lookupVersion across the board.
* Yet, we have to deal with multiple document delivery, for this we use a property of the document that is added
* to detect if it has potentially been added before. We use the documents timestamp for this since it's something
* that:
* - doesn't change per document
* - is preserved in the transaction log
* - and is assigned before we start to index / replicate
* NOTE: it's not important for this timestamp to be consistent across nodes etc. it's just a number that is in the common
* case increasing and can be used in the failure case when we retry and resent documents to establish a happens before
* relationship. For instance:
* - doc A has autoGeneratedIdTimestamp = 10, isRetry = false
* - doc B has autoGeneratedIdTimestamp = 9, isRetry = false
*
* while both docs are in in flight, we disconnect on one node, reconnect and send doc A again
* - now doc A' has autoGeneratedIdTimestamp = 10, isRetry = true
*
* if A' arrives on the shard first we update maxUnsafeAutoIdTimestamp to 10 and use update document. All subsequent
* documents that arrive (A and B) will also use updateDocument since their timestamps are less than
* maxUnsafeAutoIdTimestamp. While this is not strictly needed for doc B it is just much simpler to implement since it
* will just de-optimize some doc in the worst case.
*
* if A arrives on the shard first we use addDocument since maxUnsafeAutoIdTimestamp is < 10. A` will then just be skipped
* or calls updateDocument.
*/
final IndexingStrategy plan = indexingStrategyForOperation(index);
reservedDocs = plan.reservedDocs;
final IndexResult indexResult;
if (plan.earlyResultOnPreFlightError.isPresent()) {
assert index.origin() == Operation.Origin.PRIMARY : index.origin();
indexResult = plan.earlyResultOnPreFlightError.get();
assert indexResult.getResultType() == Result.Type.FAILURE : indexResult.getResultType();
} else {
// generate or register sequence number
if (index.origin() == Operation.Origin.PRIMARY) {
index = new Index(index.uid(), index.parsedDoc(), generateSeqNoForOperationOnPrimary(index), index.primaryTerm(), index.version(), index.versionType(), index.origin(), index.startTime(), index.getAutoGeneratedIdTimestamp(), index.isRetry(), index.getIfSeqNo(), index.getIfPrimaryTerm());
final boolean toAppend = plan.indexIntoLucene && plan.useLuceneUpdateDocument == false;
if (toAppend == false) {
advanceMaxSeqNoOfUpdatesOrDeletesOnPrimary(index.seqNo());
}
} else {
markSeqNoAsSeen(index.seqNo());
}
assert index.seqNo() >= 0 : "ops should have an assigned seq no.; origin: " + index.origin();
if (plan.indexIntoLucene || plan.addStaleOpToLucene) {
indexResult = indexIntoLucene(index, plan);
} else {
indexResult = new IndexResult(plan.versionForIndexing, index.primaryTerm(), index.seqNo(), plan.currentNotFoundOrDeleted);
}
}
if (index.origin().isFromTranslog() == false) {
final Translog.Location location;
if (indexResult.getResultType() == Result.Type.SUCCESS) {
location = translog.add(new Translog.Index(index, indexResult));
} else if (indexResult.getSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) {
// if we have document failure, record it as a no-op in the translog and Lucene with the generated seq_no
final NoOp noOp = new NoOp(indexResult.getSeqNo(), index.primaryTerm(), index.origin(), index.startTime(), indexResult.getFailure().toString());
location = innerNoOp(noOp).getTranslogLocation();
} else {
location = null;
}
indexResult.setTranslogLocation(location);
}
if (plan.indexIntoLucene && indexResult.getResultType() == Result.Type.SUCCESS) {
final Translog.Location translogLocation = trackTranslogLocation.get() ? indexResult.getTranslogLocation() : null;
versionMap.maybePutIndexUnderLock(index.uid().bytes(), new IndexVersionValue(translogLocation, plan.versionForIndexing, index.seqNo(), index.primaryTerm()));
}
localCheckpointTracker.markSeqNoAsProcessed(indexResult.getSeqNo());
if (indexResult.getTranslogLocation() == null) {
// the op is coming from the translog (and is hence persisted already) or it does not have a sequence number
assert index.origin().isFromTranslog() || indexResult.getSeqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO;
localCheckpointTracker.markSeqNoAsPersisted(indexResult.getSeqNo());
}
indexResult.setTook(System.nanoTime() - index.startTime());
indexResult.freeze();
return indexResult;
} finally {
releaseInFlightDocs(reservedDocs);
}
} catch (RuntimeException | IOException e) {
try {
if (e instanceof AlreadyClosedException == false && treatDocumentFailureAsTragicError(index)) {
failEngine("index id[" + index.id() + "] origin[" + index.origin() + "] seq#[" + index.seqNo() + "]", e);
} else {
maybeFailEngine("index id[" + index.id() + "] origin[" + index.origin() + "] seq#[" + index.seqNo() + "]", e);
}
} catch (Exception inner) {
e.addSuppressed(inner);
}
throw e;
}
}
use of org.opensearch.index.translog.Translog in project OpenSearch by opensearch-project.
the class ReadOnlyEngine method translogStats.
private static TranslogStats translogStats(final EngineConfig config, final SegmentInfos infos) throws IOException {
final String translogUuid = infos.getUserData().get(Translog.TRANSLOG_UUID_KEY);
if (translogUuid == null) {
throw new IllegalStateException("commit doesn't contain translog unique id");
}
final TranslogConfig translogConfig = config.getTranslogConfig();
final TranslogDeletionPolicy translogDeletionPolicy = new DefaultTranslogDeletionPolicy(config.getIndexSettings().getTranslogRetentionSize().getBytes(), config.getIndexSettings().getTranslogRetentionAge().getMillis(), config.getIndexSettings().getTranslogRetentionTotalFiles());
final long localCheckpoint = Long.parseLong(infos.getUserData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY));
translogDeletionPolicy.setLocalCheckpointOfSafeCommit(localCheckpoint);
try (Translog translog = new Translog(translogConfig, translogUuid, translogDeletionPolicy, config.getGlobalCheckpointSupplier(), config.getPrimaryTermSupplier(), seqNo -> {
})) {
return translog.stats();
}
}
use of org.opensearch.index.translog.Translog in project OpenSearch by opensearch-project.
the class IndexShardIT method testDurableFlagHasEffect.
public void testDurableFlagHasEffect() throws Exception {
createIndex("test");
ensureGreen();
client().prepareIndex("test").setId("1").setSource("{}", XContentType.JSON).get();
IndicesService indicesService = getInstanceFromNode(IndicesService.class);
IndexService test = indicesService.indexService(resolveIndex("test"));
IndexShard shard = test.getShardOrNull(0);
Translog translog = getTranslog(shard);
Predicate<Translog> needsSync = (tlog) -> {
// we can't use tlog.needsSync() here since it also takes the global checkpoint into account
// we explicitly want to check here if our durability checks are taken into account so we only
// check if we are synced upto the current write location
Translog.Location lastWriteLocation = tlog.getLastWriteLocation();
try {
// the lastWriteLocaltion has a Integer.MAX_VALUE size so we have to create a new one
return tlog.ensureSynced(new Translog.Location(lastWriteLocation.generation, lastWriteLocation.translogLocation, 0));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
setDurability(shard, Translog.Durability.REQUEST);
assertFalse(needsSync.test(translog));
setDurability(shard, Translog.Durability.ASYNC);
client().prepareIndex("test").setId("2").setSource("{}", XContentType.JSON).get();
assertTrue(needsSync.test(translog));
setDurability(shard, Translog.Durability.REQUEST);
client().prepareDelete("test", "1").get();
assertFalse(needsSync.test(translog));
setDurability(shard, Translog.Durability.ASYNC);
client().prepareDelete("test", "2").get();
assertTrue(translog.syncNeeded());
setDurability(shard, Translog.Durability.REQUEST);
assertNoFailures(client().prepareBulk().add(client().prepareIndex("test").setId("3").setSource("{}", XContentType.JSON)).add(client().prepareDelete("test", "1")).get());
assertFalse(needsSync.test(translog));
setDurability(shard, Translog.Durability.ASYNC);
assertNoFailures(client().prepareBulk().add(client().prepareIndex("test").setId("4").setSource("{}", XContentType.JSON)).add(client().prepareDelete("test", "3")).get());
setDurability(shard, Translog.Durability.REQUEST);
assertTrue(needsSync.test(translog));
}
Aggregations