use of org.elasticsearch.index.translog.Translog in project elasticsearch by elastic.
the class InternalEngineTests method testMissingTranslog.
public void testMissingTranslog() throws IOException {
// test that we can force start the engine , even if the translog is missing.
// fake a new translog, causing the engine to point to a missing one.
Translog translog = createTranslog();
long id = translog.currentFileGeneration();
try {
engine = createEngine(store, primaryTranslogDir);
fail("engine shouldn't start without a valid translog id");
} catch (EngineCreationFailureException ex) {
// expected
// now it should be OK.
EngineConfig config = copy(config(defaultSettings, store, primaryTranslogDir, newMergePolicy(), IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP, null), EngineConfig.OpenMode.OPEN_INDEX_CREATE_TRANSLOG);
engine = new InternalEngine(config);
use of org.elasticsearch.index.translog.Translog in project elasticsearch by elastic.
the class GlobalCheckpointSyncActionTests method testTranslogSyncAfterGlobalCheckpointSync.
public void testTranslogSyncAfterGlobalCheckpointSync() throws Exception {
final IndicesService indicesService = mock(IndicesService.class);
final Index index = new Index("index", "uuid");
final IndexService indexService = mock(IndexService.class);
final int id = randomIntBetween(0, 4);
final IndexShard indexShard = mock(IndexShard.class);
final Translog translog = mock(Translog.class);
final GlobalCheckpointSyncAction action = new GlobalCheckpointSyncAction(Settings.EMPTY, transportService, clusterService, indicesService, threadPool, shardStateAction, new ActionFilters(Collections.emptySet()), new IndexNameExpressionResolver(Settings.EMPTY));
final ShardId shardId = new ShardId(index, id);
final GlobalCheckpointSyncAction.PrimaryRequest primaryRequest = new GlobalCheckpointSyncAction.PrimaryRequest(shardId);
if (randomBoolean()) {
action.shardOperationOnPrimary(primaryRequest, indexShard);
} else {
action.shardOperationOnReplica(new GlobalCheckpointSyncAction.ReplicaRequest(primaryRequest, randomNonNegativeLong()), indexShard);
use of org.elasticsearch.index.translog.Translog in project crate by crate.
the class BlobRecoverySourceHandler method recoverToTarget.
* performs the recovery from the local engine to the target
public RecoveryResponse recoverToTarget() {
final Engine engine = shard.engine();
assert engine.getTranslog() != null : "translog must not be null";
try (Translog.View translogView = engine.getTranslog().newView()) {
logger.trace("captured translog id [{}] for recovery", translogView.minTranslogGeneration());
final SnapshotIndexCommit phase1Snapshot;
try {
phase1Snapshot = shard.snapshotIndex(false);
} catch (Throwable e) {
throw new RecoveryEngineException(shard.shardId(), 1, "Snapshot failed", e);
try {
phase1(phase1Snapshot, translogView);
} catch (Throwable e) {
throw new RecoveryEngineException(shard.shardId(), 1, "phase1 failed", e);
} finally {
logger.trace("snapshot translog for recovery. current size is [{}]", translogView.totalOperations());
try (Translog.Snapshot phase2Snapshot = translogView.snapshot()) {
} catch (Throwable e) {
throw new RecoveryEngineException(shard.shardId(), 2, "phase2 failed", e);
return response;
use of org.elasticsearch.index.translog.Translog in project crate by crate.
the class BlobRecoverySourceHandler method sendSnapshot.
* Send the given snapshot's operations to this handler's target node.
* <p/>
* Operations are bulked into a single request depending on an operation
* count limit or size-in-bytes limit
* @return the total number of translog operations that were sent
protected int sendSnapshot(final Translog.Snapshot snapshot) {
int ops = 0;
long size = 0;
int totalOperations = 0;
final List<Translog.Operation> operations = new ArrayList<>();
Translog.Operation operation;
try {
// this ex should bubble up
operation =;
} catch (IOException ex) {
throw new ElasticsearchException("failed to get next operation from translog", ex);
final TransportRequestOptions recoveryOptions = TransportRequestOptions.builder().withCompress(recoverySettings.compress()).withType(TransportRequestOptions.Type.RECOVERY).withTimeout(recoverySettings.internalActionLongTimeout()).build();
if (operation == null) {
logger.trace("[{}][{}] no translog operations to send to {}", indexName, shardId, request.targetNode());
while (operation != null) {
if (shard.state() == IndexShardState.CLOSED) {
throw new IndexShardClosedException(request.shardId());
ops += 1;
size += operation.estimateSize();
// if so, send it off
if (ops >= recoverySettings.translogOps() || size >= recoverySettings.translogSize().getBytes()) {
// don't throttle translog, since we lock for phase3 indexing,
// so we need to move it as fast as possible. Note, since we
// index docs to replicas while the index files are recovered
// the lock can potentially be removed, in which case, it might
// make sense to re-enable throttling in this phase
// if (recoverySettings.rateLimiter() != null) {
// recoverySettings.rateLimiter().pause(size);
// }
cancellableThreads.execute(new Interruptable() {
public void run() throws InterruptedException {
final RecoveryTranslogOperationsRequest translogOperationsRequest = new RecoveryTranslogOperationsRequest(request.recoveryId(), request.shardId(), operations, snapshot.estimatedTotalOperations());
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.TRANSLOG_OPS, translogOperationsRequest, recoveryOptions, EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
if (logger.isTraceEnabled()) {
logger.trace("[{}][{}] sent batch of [{}][{}] (total: [{}]) translog operations to {}", indexName, shardId, ops, new ByteSizeValue(size), snapshot.estimatedTotalOperations(), request.targetNode());
ops = 0;
size = 0;
try {
// this ex should bubble up
operation =;
} catch (IOException ex) {
throw new ElasticsearchException("failed to get next operation from translog", ex);
// send the leftover
if (!operations.isEmpty()) {
cancellableThreads.execute(new Interruptable() {
public void run() throws InterruptedException {
RecoveryTranslogOperationsRequest translogOperationsRequest = new RecoveryTranslogOperationsRequest(request.recoveryId(), request.shardId(), operations, snapshot.estimatedTotalOperations());
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.TRANSLOG_OPS, translogOperationsRequest, recoveryOptions, EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
if (logger.isTraceEnabled()) {
logger.trace("[{}][{}] sent final batch of [{}][{}] (total: [{}]) translog operations to {}", indexName, shardId, ops, new ByteSizeValue(size), snapshot.estimatedTotalOperations(), request.targetNode());
return totalOperations;
use of org.elasticsearch.index.translog.Translog in project elasticsearch by elastic.
the class RecoverySourceHandler method isTranslogReadyForSequenceNumberBasedRecovery.
* Determines if the source translog is ready for a sequence-number-based peer recovery. The main condition here is that the source
* translog contains all operations between the local checkpoint on the target and the current maximum sequence number on the source.
* @param translogView a view of the translog on the source
* @return {@code true} if the source is ready for a sequence-number-based recovery
* @throws IOException if an I/O exception occurred reading the translog snapshot
boolean isTranslogReadyForSequenceNumberBasedRecovery(final Translog.View translogView) throws IOException {
final long startingSeqNo = request.startingSeqNo();
assert startingSeqNo >= 0;
final long endingSeqNo = shard.seqNoStats().getMaxSeqNo();
logger.trace("testing sequence numbers in range: [{}, {}]", startingSeqNo, endingSeqNo);
// the start recovery request is initialized with the starting sequence number set to the target shard's local checkpoint plus one
if (startingSeqNo - 1 <= endingSeqNo) {
* We need to wait for all operations up to the current max to complete, otherwise we can not guarantee that all
* operations in the required range will be available for replaying from the translog of the source.
cancellableThreads.execute(() -> shard.waitForOpsToComplete(endingSeqNo));
logger.trace("all operations up to [{}] completed, checking translog content", endingSeqNo);
final LocalCheckpointTracker tracker = new LocalCheckpointTracker(shard.indexSettings(), startingSeqNo, startingSeqNo - 1);
final Translog.Snapshot snapshot = translogView.snapshot();
Translog.Operation operation;
while ((operation = != null) {
if (operation.seqNo() != SequenceNumbersService.UNASSIGNED_SEQ_NO) {
return tracker.getCheckpoint() >= endingSeqNo;
} else {
// all assumptions regarding local and global checkpoints
return false;