use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.
the class ClusterApplierService method runTask.
private void runTask(UpdateTask task) {
if (!lifecycle.started()) {
logger.debug("processing [{}]: ignoring, cluster applier service not started", task.source);
return;
}
logger.debug("processing [{}]: execute", task.source);
final ClusterState previousClusterState = state.get();
long startTimeMS = currentTimeInMillis();
final StopWatch stopWatch = new StopWatch();
final ClusterState newClusterState;
try {
try (Releasable ignored = stopWatch.timing("running task [" + task.source + ']')) {
newClusterState = task.apply(previousClusterState);
}
} catch (Exception e) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
logger.trace(() -> new ParameterizedMessage("failed to execute cluster state applier in [{}], state:\nversion [{}], source [{}]\n{}", executionTime, previousClusterState.version(), task.source, previousClusterState), e);
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onFailure(task.source, e);
return;
}
if (previousClusterState == newClusterState) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
logger.debug("processing [{}]: took [{}] no change in cluster state", task.source, executionTime);
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onSuccess(task.source);
} else {
if (logger.isTraceEnabled()) {
logger.debug("cluster state updated, version [{}], source [{}]\n{}", newClusterState.version(), task.source, newClusterState);
} else {
logger.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), task.source);
}
try {
applyChanges(task, previousClusterState, newClusterState, stopWatch);
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
logger.debug("processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, executionTime, newClusterState.version(), newClusterState.stateUUID());
warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
task.listener.onSuccess(task.source);
} catch (Exception e) {
TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
if (logger.isTraceEnabled()) {
logger.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source, newClusterState), e);
} else {
logger.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source), e);
}
// continue we will retry with the same cluster state but that might not help.
assert applicationMayFail();
task.listener.onFailure(task.source, e);
}
}
}
use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method phase2.
/**
* Perform phase two of the recovery process.
* <p>
* Phase two uses a snapshot of the current translog *without* acquiring the write lock (however, the translog snapshot is
* point-in-time view of the translog). It then sends each translog operation to the target node so it can be replayed into the new
* shard.
*
* @param startingSeqNo the sequence number to start recovery from, or {@link SequenceNumbers#UNASSIGNED_SEQ_NO} if all
* ops should be sent
* @param endingSeqNo the highest sequence number that should be sent
* @param snapshot a snapshot of the translog
* @param maxSeenAutoIdTimestamp the max auto_id_timestamp of append-only requests on the primary
* @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates or deletes on the primary after these operations were executed on it.
* @param listener a listener which will be notified with the local checkpoint on the target.
*/
void phase2(final long startingSeqNo, final long endingSeqNo, final Translog.Snapshot snapshot, final long maxSeenAutoIdTimestamp, final long maxSeqNoOfUpdatesOrDeletes, final RetentionLeases retentionLeases, final long mappingVersion, final ActionListener<SendSnapshotResult> listener) throws IOException {
if (shard.state() == IndexShardState.CLOSED) {
throw new IndexShardClosedException(request.shardId());
}
logger.trace("recovery [phase2]: sending transaction log operations (from [" + startingSeqNo + "] to [" + endingSeqNo + "]");
final StopWatch stopWatch = new StopWatch().start();
final StepListener<Void> sendListener = new StepListener<>();
final OperationBatchSender sender = new OperationBatchSender(startingSeqNo, endingSeqNo, snapshot, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes, retentionLeases, mappingVersion, sendListener);
sendListener.whenComplete(ignored -> {
final long skippedOps = sender.skippedOps.get();
final int totalSentOps = sender.sentOps.get();
final long targetLocalCheckpoint = sender.targetLocalCheckpoint.get();
assert snapshot.totalOperations() == snapshot.skippedOperations() + skippedOps + totalSentOps : String.format(Locale.ROOT, "expected total [%d], overridden [%d], skipped [%d], total sent [%d]", snapshot.totalOperations(), snapshot.skippedOperations(), skippedOps, totalSentOps);
stopWatch.stop();
final TimeValue tookTime = stopWatch.totalTime();
logger.trace("recovery [phase2]: took [{}]", tookTime);
listener.onResponse(new SendSnapshotResult(targetLocalCheckpoint, totalSentOps, tookTime));
}, listener::onFailure);
sender.start();
}
use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method prepareTargetForTranslog.
void prepareTargetForTranslog(int totalTranslogOps, ActionListener<TimeValue> listener) {
StopWatch stopWatch = new StopWatch().start();
final ActionListener<Void> wrappedListener = ActionListener.wrap(nullVal -> {
stopWatch.stop();
final TimeValue tookTime = stopWatch.totalTime();
logger.trace("recovery [phase1]: remote engine start took [{}]", tookTime);
listener.onResponse(tookTime);
}, e -> listener.onFailure(new RecoveryEngineException(shard.shardId(), 1, "prepare target for translog failed", e)));
// Send a request preparing the new shard's translog to receive operations. This ensures the shard engine is started and disables
// garbage collection (not the JVM's GC!) of tombstone deletes.
logger.trace("recovery [phase1]: prepare remote engine for translog");
cancellableThreads.checkForCancel();
recoveryTarget.prepareForTranslogOperations(totalTranslogOps, wrappedListener);
}
use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.
the class IndexAliasesIT method testSameAlias.
public void testSameAlias() throws Exception {
logger.info("--> creating index [test]");
assertAcked(prepareCreate("test").addMapping("type", "name", "type=text"));
ensureGreen();
logger.info("--> creating alias1 ");
assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1"))));
TimeValue timeout = TimeValue.timeValueSeconds(2);
logger.info("--> recreating alias1 ");
StopWatch stopWatch = new StopWatch();
stopWatch.start();
assertAliasesVersionUnchanged("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1").setTimeout(timeout))));
assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
logger.info("--> modifying alias1 to have a filter");
stopWatch.start();
final TermQueryBuilder fooFilter = termQuery("name", "foo");
assertAliasesVersionIncreases("test", () -> assertAcked(admin().indices().prepareAliases().addAlias("test", "alias1", fooFilter).setTimeout(timeout)));
assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
logger.info("--> recreating alias1 with the same filter");
stopWatch.start();
assertAliasesVersionUnchanged("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1", fooFilter).setTimeout(timeout))));
assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
logger.info("--> recreating alias1 with a different filter");
stopWatch.start();
final TermQueryBuilder barFilter = termQuery("name", "bar");
assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1", barFilter).setTimeout(timeout))));
assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
logger.info("--> verify that filter was updated");
AliasMetadata aliasMetadata = ((IndexAbstraction.Alias) internalCluster().clusterService().state().metadata().getIndicesLookup().get("alias1")).getFirstAliasMetadata();
assertThat(aliasMetadata.getFilter().toString(), equalTo("{\"term\":{\"name\":{\"value\":\"bar\",\"boost\":1.0}}}"));
logger.info("--> deleting alias1");
stopWatch.start();
assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().removeAlias("test", "alias1").setTimeout(timeout))));
assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
}
use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method finalizeRecovery.
void finalizeRecovery(long targetLocalCheckpoint, long trimAboveSeqNo, ActionListener<Void> listener) throws IOException {
if (shard.state() == IndexShardState.CLOSED) {
throw new IndexShardClosedException(request.shardId());
}
cancellableThreads.checkForCancel();
StopWatch stopWatch = new StopWatch().start();
logger.trace("finalizing recovery");
/*
* Before marking the shard as in-sync we acquire an operation permit. We do this so that there is a barrier between marking a
* shard as in-sync and relocating a shard. If we acquire the permit then no relocation handoff can complete before we are done
* marking the shard as in-sync. If the relocation handoff holds all the permits then after the handoff completes and we acquire
* the permit then the state of the shard will be relocated and this recovery will fail.
*/
runUnderPrimaryPermit(() -> shard.markAllocationIdAsInSync(request.targetAllocationId(), targetLocalCheckpoint), shardId + " marking " + request.targetAllocationId() + " as in sync", shard, cancellableThreads, logger);
// this global checkpoint is persisted in finalizeRecovery
final long globalCheckpoint = shard.getLastKnownGlobalCheckpoint();
final StepListener<Void> finalizeListener = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.finalizeRecovery(globalCheckpoint, trimAboveSeqNo, finalizeListener);
finalizeListener.whenComplete(r -> {
runUnderPrimaryPermit(() -> shard.updateGlobalCheckpointForShard(request.targetAllocationId(), globalCheckpoint), shardId + " updating " + request.targetAllocationId() + "'s global checkpoint", shard, cancellableThreads, logger);
if (request.isPrimaryRelocation()) {
logger.trace("performing relocation hand-off");
// TODO: make relocated async
// this acquires all IndexShard operation permits and will thus delay new recoveries until it is done
cancellableThreads.execute(() -> shard.relocated(request.targetAllocationId(), recoveryTarget::handoffPrimaryContext));
/*
* if the recovery process fails after disabling primary mode on the source shard, both relocation source and
* target are failed (see {@link IndexShard#updateRoutingEntry}).
*/
}
stopWatch.stop();
logger.trace("finalizing recovery took [{}]", stopWatch.totalTime());
listener.onResponse(null);
}, listener::onFailure);
}
Aggregations