Search in sources :

Example 1 with StopWatch

use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.

the class ClusterApplierService method runTask.

private void runTask(UpdateTask task) {
    if (!lifecycle.started()) {
        logger.debug("processing [{}]: ignoring, cluster applier service not started", task.source);
        return;
    }
    logger.debug("processing [{}]: execute", task.source);
    final ClusterState previousClusterState = state.get();
    long startTimeMS = currentTimeInMillis();
    final StopWatch stopWatch = new StopWatch();
    final ClusterState newClusterState;
    try {
        try (Releasable ignored = stopWatch.timing("running task [" + task.source + ']')) {
            newClusterState = task.apply(previousClusterState);
        }
    } catch (Exception e) {
        TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
        logger.trace(() -> new ParameterizedMessage("failed to execute cluster state applier in [{}], state:\nversion [{}], source [{}]\n{}", executionTime, previousClusterState.version(), task.source, previousClusterState), e);
        warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
        task.listener.onFailure(task.source, e);
        return;
    }
    if (previousClusterState == newClusterState) {
        TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
        logger.debug("processing [{}]: took [{}] no change in cluster state", task.source, executionTime);
        warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
        task.listener.onSuccess(task.source);
    } else {
        if (logger.isTraceEnabled()) {
            logger.debug("cluster state updated, version [{}], source [{}]\n{}", newClusterState.version(), task.source, newClusterState);
        } else {
            logger.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), task.source);
        }
        try {
            applyChanges(task, previousClusterState, newClusterState, stopWatch);
            TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
            logger.debug("processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, executionTime, newClusterState.version(), newClusterState.stateUUID());
            warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch);
            task.listener.onSuccess(task.source);
        } catch (Exception e) {
            TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS));
            if (logger.isTraceEnabled()) {
                logger.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source, newClusterState), e);
            } else {
                logger.warn(new ParameterizedMessage("failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]", executionTime, newClusterState.version(), newClusterState.stateUUID(), task.source), e);
            }
            // continue we will retry with the same cluster state but that might not help.
            assert applicationMayFail();
            task.listener.onFailure(task.source, e);
        }
    }
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) Releasable(org.opensearch.common.lease.Releasable) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) ProcessClusterEventTimeoutException(org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException) TimeValue(org.opensearch.common.unit.TimeValue) StopWatch(org.opensearch.common.StopWatch)

Example 2 with StopWatch

use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.

the class RecoverySourceHandler method phase2.

/**
 * Perform phase two of the recovery process.
 * <p>
 * Phase two uses a snapshot of the current translog *without* acquiring the write lock (however, the translog snapshot is
 * point-in-time view of the translog). It then sends each translog operation to the target node so it can be replayed into the new
 * shard.
 *
 * @param startingSeqNo              the sequence number to start recovery from, or {@link SequenceNumbers#UNASSIGNED_SEQ_NO} if all
 *                                   ops should be sent
 * @param endingSeqNo                the highest sequence number that should be sent
 * @param snapshot                   a snapshot of the translog
 * @param maxSeenAutoIdTimestamp     the max auto_id_timestamp of append-only requests on the primary
 * @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates or deletes on the primary after these operations were executed on it.
 * @param listener                   a listener which will be notified with the local checkpoint on the target.
 */
void phase2(final long startingSeqNo, final long endingSeqNo, final Translog.Snapshot snapshot, final long maxSeenAutoIdTimestamp, final long maxSeqNoOfUpdatesOrDeletes, final RetentionLeases retentionLeases, final long mappingVersion, final ActionListener<SendSnapshotResult> listener) throws IOException {
    if (shard.state() == IndexShardState.CLOSED) {
        throw new IndexShardClosedException(request.shardId());
    }
    logger.trace("recovery [phase2]: sending transaction log operations (from [" + startingSeqNo + "] to [" + endingSeqNo + "]");
    final StopWatch stopWatch = new StopWatch().start();
    final StepListener<Void> sendListener = new StepListener<>();
    final OperationBatchSender sender = new OperationBatchSender(startingSeqNo, endingSeqNo, snapshot, maxSeenAutoIdTimestamp, maxSeqNoOfUpdatesOrDeletes, retentionLeases, mappingVersion, sendListener);
    sendListener.whenComplete(ignored -> {
        final long skippedOps = sender.skippedOps.get();
        final int totalSentOps = sender.sentOps.get();
        final long targetLocalCheckpoint = sender.targetLocalCheckpoint.get();
        assert snapshot.totalOperations() == snapshot.skippedOperations() + skippedOps + totalSentOps : String.format(Locale.ROOT, "expected total [%d], overridden [%d], skipped [%d], total sent [%d]", snapshot.totalOperations(), snapshot.skippedOperations(), skippedOps, totalSentOps);
        stopWatch.stop();
        final TimeValue tookTime = stopWatch.totalTime();
        logger.trace("recovery [phase2]: took [{}]", tookTime);
        listener.onResponse(new SendSnapshotResult(targetLocalCheckpoint, totalSentOps, tookTime));
    }, listener::onFailure);
    sender.start();
}
Also used : IndexShardClosedException(org.opensearch.index.shard.IndexShardClosedException) StepListener(org.opensearch.action.StepListener) TimeValue(org.opensearch.common.unit.TimeValue) StopWatch(org.opensearch.common.StopWatch)

Example 3 with StopWatch

use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.

the class RecoverySourceHandler method prepareTargetForTranslog.

void prepareTargetForTranslog(int totalTranslogOps, ActionListener<TimeValue> listener) {
    StopWatch stopWatch = new StopWatch().start();
    final ActionListener<Void> wrappedListener = ActionListener.wrap(nullVal -> {
        stopWatch.stop();
        final TimeValue tookTime = stopWatch.totalTime();
        logger.trace("recovery [phase1]: remote engine start took [{}]", tookTime);
        listener.onResponse(tookTime);
    }, e -> listener.onFailure(new RecoveryEngineException(shard.shardId(), 1, "prepare target for translog failed", e)));
    // Send a request preparing the new shard's translog to receive operations. This ensures the shard engine is started and disables
    // garbage collection (not the JVM's GC!) of tombstone deletes.
    logger.trace("recovery [phase1]: prepare remote engine for translog");
    cancellableThreads.checkForCancel();
    recoveryTarget.prepareForTranslogOperations(totalTranslogOps, wrappedListener);
}
Also used : RecoveryEngineException(org.opensearch.index.engine.RecoveryEngineException) TimeValue(org.opensearch.common.unit.TimeValue) StopWatch(org.opensearch.common.StopWatch)

Example 4 with StopWatch

use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.

the class IndexAliasesIT method testSameAlias.

public void testSameAlias() throws Exception {
    logger.info("--> creating index [test]");
    assertAcked(prepareCreate("test").addMapping("type", "name", "type=text"));
    ensureGreen();
    logger.info("--> creating alias1 ");
    assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1"))));
    TimeValue timeout = TimeValue.timeValueSeconds(2);
    logger.info("--> recreating alias1 ");
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    assertAliasesVersionUnchanged("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1").setTimeout(timeout))));
    assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
    logger.info("--> modifying alias1 to have a filter");
    stopWatch.start();
    final TermQueryBuilder fooFilter = termQuery("name", "foo");
    assertAliasesVersionIncreases("test", () -> assertAcked(admin().indices().prepareAliases().addAlias("test", "alias1", fooFilter).setTimeout(timeout)));
    assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
    logger.info("--> recreating alias1 with the same filter");
    stopWatch.start();
    assertAliasesVersionUnchanged("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1", fooFilter).setTimeout(timeout))));
    assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
    logger.info("--> recreating alias1 with a different filter");
    stopWatch.start();
    final TermQueryBuilder barFilter = termQuery("name", "bar");
    assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().addAlias("test", "alias1", barFilter).setTimeout(timeout))));
    assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
    logger.info("--> verify that filter was updated");
    AliasMetadata aliasMetadata = ((IndexAbstraction.Alias) internalCluster().clusterService().state().metadata().getIndicesLookup().get("alias1")).getFirstAliasMetadata();
    assertThat(aliasMetadata.getFilter().toString(), equalTo("{\"term\":{\"name\":{\"value\":\"bar\",\"boost\":1.0}}}"));
    logger.info("--> deleting alias1");
    stopWatch.start();
    assertAliasesVersionIncreases("test", () -> assertAcked((admin().indices().prepareAliases().removeAlias("test", "alias1").setTimeout(timeout))));
    assertThat(stopWatch.stop().lastTaskTime().millis(), lessThan(timeout.millis()));
}
Also used : AliasMetadata(org.opensearch.cluster.metadata.AliasMetadata) Alias(org.opensearch.action.admin.indices.alias.Alias) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) TimeValue(org.opensearch.common.unit.TimeValue) StopWatch(org.opensearch.common.StopWatch)

Example 5 with StopWatch

use of org.opensearch.common.StopWatch in project OpenSearch by opensearch-project.

the class RecoverySourceHandler method finalizeRecovery.

void finalizeRecovery(long targetLocalCheckpoint, long trimAboveSeqNo, ActionListener<Void> listener) throws IOException {
    if (shard.state() == IndexShardState.CLOSED) {
        throw new IndexShardClosedException(request.shardId());
    }
    cancellableThreads.checkForCancel();
    StopWatch stopWatch = new StopWatch().start();
    logger.trace("finalizing recovery");
    /*
         * Before marking the shard as in-sync we acquire an operation permit. We do this so that there is a barrier between marking a
         * shard as in-sync and relocating a shard. If we acquire the permit then no relocation handoff can complete before we are done
         * marking the shard as in-sync. If the relocation handoff holds all the permits then after the handoff completes and we acquire
         * the permit then the state of the shard will be relocated and this recovery will fail.
         */
    runUnderPrimaryPermit(() -> shard.markAllocationIdAsInSync(request.targetAllocationId(), targetLocalCheckpoint), shardId + " marking " + request.targetAllocationId() + " as in sync", shard, cancellableThreads, logger);
    // this global checkpoint is persisted in finalizeRecovery
    final long globalCheckpoint = shard.getLastKnownGlobalCheckpoint();
    final StepListener<Void> finalizeListener = new StepListener<>();
    cancellableThreads.checkForCancel();
    recoveryTarget.finalizeRecovery(globalCheckpoint, trimAboveSeqNo, finalizeListener);
    finalizeListener.whenComplete(r -> {
        runUnderPrimaryPermit(() -> shard.updateGlobalCheckpointForShard(request.targetAllocationId(), globalCheckpoint), shardId + " updating " + request.targetAllocationId() + "'s global checkpoint", shard, cancellableThreads, logger);
        if (request.isPrimaryRelocation()) {
            logger.trace("performing relocation hand-off");
            // TODO: make relocated async
            // this acquires all IndexShard operation permits and will thus delay new recoveries until it is done
            cancellableThreads.execute(() -> shard.relocated(request.targetAllocationId(), recoveryTarget::handoffPrimaryContext));
        /*
                 * if the recovery process fails after disabling primary mode on the source shard, both relocation source and
                 * target are failed (see {@link IndexShard#updateRoutingEntry}).
                 */
        }
        stopWatch.stop();
        logger.trace("finalizing recovery took [{}]", stopWatch.totalTime());
        listener.onResponse(null);
    }, listener::onFailure);
}
Also used : IndexShardClosedException(org.opensearch.index.shard.IndexShardClosedException) StepListener(org.opensearch.action.StepListener) StopWatch(org.opensearch.common.StopWatch)

Aggregations

StopWatch (org.opensearch.common.StopWatch)7 TimeValue (org.opensearch.common.unit.TimeValue)5 StepListener (org.opensearch.action.StepListener)3 IndexShardClosedException (org.opensearch.index.shard.IndexShardClosedException)3 ArrayList (java.util.ArrayList)2 RecoveryEngineException (org.opensearch.index.engine.RecoveryEngineException)2 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)1 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)1 Alias (org.opensearch.action.admin.indices.alias.Alias)1 SearchTransportService (org.opensearch.action.search.SearchTransportService)1 Client (org.opensearch.client.Client)1 NodeClient (org.opensearch.client.node.NodeClient)1 ClusterState (org.opensearch.cluster.ClusterState)1 NodeConnectionsService (org.opensearch.cluster.NodeConnectionsService)1