Search in sources :

Example 1 with ExecutionNotFoundException

use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.

the class MasterSnapshotContext method onSnapshotPhase2Complete.

/**
 * @param phase1Error error from the phase-1. Null if phase-1 was successful.
 * @param responses collected responses from the members
 * @param snapshotFlags flags of the snapshot
 * @param future future to be completed when the phase-2 is fully completed
 * @param startTime phase-1 start time
 */
private void onSnapshotPhase2Complete(String phase1Error, Collection<Entry<MemberInfo, Object>> responses, long executionId, long snapshotId, int snapshotFlags, @Nullable CompletableFuture<Void> future, long startTime) {
    mc.coordinationService().submitToCoordinatorThread(() -> {
        if (executionId != mc.executionId()) {
            LoggingUtil.logFine(logger, "%s: ignoring responses for snapshot %s phase 2: " + "the responses are from a different execution: %s. Responses: %s", mc.jobIdString(), snapshotId, idToString(executionId), responses);
            return;
        }
        for (Entry<MemberInfo, Object> response : responses) {
            if (response.getValue() instanceof Throwable) {
                logger.log(response.getValue() instanceof ExecutionNotFoundException ? Level.FINE : Level.WARNING, SnapshotPhase2Operation.class.getSimpleName() + " for snapshot " + snapshotId + " in " + mc.jobIdString() + " failed on member: " + response, (Throwable) response.getValue());
            }
        }
        if (future != null) {
            if (phase1Error == null) {
                future.complete(null);
            } else {
                future.completeExceptionally(new JetException(phase1Error));
            }
        }
        mc.lock();
        try {
            // double-check the execution ID after locking
            if (executionId != mc.executionId()) {
                logger.fine("Not completing terminalSnapshotFuture on " + mc.jobIdString() + ", new execution " + "already started, snapshot was for executionId=" + idToString(executionId));
                return;
            }
            assert snapshotInProgress : "snapshot not in progress";
            snapshotInProgress = false;
            if (SnapshotFlags.isTerminal(snapshotFlags)) {
                // after a terminal snapshot, no more snapshots are scheduled in this execution
                boolean completedNow = terminalSnapshotFuture.complete(null);
                assert completedNow : "terminalSnapshotFuture was already completed";
                if (phase1Error != null) {
                    // If the terminal snapshot failed, the executions might not terminate on some members
                    // normally and we don't care if they do - the snapshot is done and we have to bring the
                    // execution down. Let's execute the CompleteExecutionOperation to terminate them.
                    mc.jobContext().cancelExecutionInvocations(mc.jobId(), mc.executionId(), null, null);
                }
            } else if (!SnapshotFlags.isExport(snapshotFlags)) {
                // if this snapshot was an automatic snapshot, schedule the next one
                mc.coordinationService().scheduleSnapshot(mc, executionId);
            }
        } finally {
            mc.unlock();
        }
        if (logger.isFineEnabled()) {
            logger.fine("Snapshot " + snapshotId + " for " + mc.jobIdString() + " completed in " + (System.currentTimeMillis() - startTime) + "ms, status=" + (phase1Error == null ? "success" : "failure: " + phase1Error));
        }
        tryBeginSnapshot();
    });
}
Also used : ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) SnapshotPhase2Operation(com.hazelcast.jet.impl.operation.SnapshotPhase2Operation) JetException(com.hazelcast.jet.JetException)

Example 2 with ExecutionNotFoundException

use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.

the class MasterSnapshotContext method onSnapshotPhase1Complete.

/**
 * @param responses collected responses from the members
 * @param snapshotMapName the IMap name to which the snapshot is written
 * @param snapshotFlags flags of the snapshot
 * @param future a future to be completed when the phase-2 is fully completed
 */
private void onSnapshotPhase1Complete(Collection<Map.Entry<MemberInfo, Object>> responses, long executionId, long snapshotId, String snapshotMapName, int snapshotFlags, @Nullable CompletableFuture<Void> future) {
    mc.coordinationService().submitToCoordinatorThread(() -> {
        SnapshotPhase1Result mergedResult = new SnapshotPhase1Result();
        List<CompletableFuture<Void>> missingResponses = new ArrayList<>();
        for (Map.Entry<MemberInfo, Object> entry : responses) {
            // the response is either SnapshotOperationResult or an exception, see #invokeOnParticipants() method
            Object response = entry.getValue();
            if (response instanceof Throwable) {
                // all the responses to an array, and we'll wait for them later.
                if (response instanceof ExecutionNotFoundException) {
                    missingResponses.add(mc.startOperationResponses().get(entry.getKey().getAddress()));
                    continue;
                }
                response = new SnapshotPhase1Result(0, 0, 0, (Throwable) response);
            }
            mergedResult.merge((SnapshotPhase1Result) response);
        }
        if (!missingResponses.isEmpty()) {
            LoggingUtil.logFine(logger, "%s will wait for %d responses to StartExecutionOperation in " + "onSnapshotPhase1Complete()", mc.jobIdString(), missingResponses.size());
        }
        // In a typical case `missingResponses` will be empty. It will be non-empty if some member completed
        // its execution and some other did not, or near the completion of a job, e.g. after a failure.
        // `allOf` for an empty array returns a completed future immediately.
        // Another edge case is that we'll be waiting for a response to start operation from a next execution,
        // which can happen much later - we could handle it, but we ignore it: when it arrives, we'll find a
        // changed executionId and ignore the response. It also doesn't occupy a thread - we're using a future.
        CompletableFuture.allOf(missingResponses.toArray(new CompletableFuture[0])).whenComplete(withTryCatch(logger, (r, t) -> onSnapshotPhase1CompleteWithStartResponses(responses, executionId, snapshotId, snapshotMapName, snapshotFlags, future, mergedResult, missingResponses)));
    });
}
Also used : SnapshotPhase2Operation(com.hazelcast.jet.impl.operation.SnapshotPhase2Operation) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) CompletableFuture(java.util.concurrent.CompletableFuture) Function(java.util.function.Function) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) JetException(com.hazelcast.jet.JetException) Util.jobNameAndExecutionId(com.hazelcast.jet.impl.util.Util.jobNameAndExecutionId) ILogger(com.hazelcast.logging.ILogger) Operation(com.hazelcast.spi.impl.operationservice.Operation) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) SnapshotPhase1Operation(com.hazelcast.jet.impl.operation.SnapshotPhase1Operation) LinkedList(java.util.LinkedList) EXPORTED_SNAPSHOTS_PREFIX(com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tuple3(com.hazelcast.jet.datamodel.Tuple3) SnapshotFlags(com.hazelcast.jet.impl.execution.SnapshotFlags) Collection(java.util.Collection) JobRepository.snapshotDataMapName(com.hazelcast.jet.impl.JobRepository.snapshotDataMapName) ExecutionException(java.util.concurrent.ExecutionException) JobRepository.exportedSnapshotMapName(com.hazelcast.jet.impl.JobRepository.exportedSnapshotMapName) SnapshotPhase1Result(com.hazelcast.jet.impl.operation.SnapshotPhase1Operation.SnapshotPhase1Result) Tuple3.tuple3(com.hazelcast.jet.datamodel.Tuple3.tuple3) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) LoggingUtil.logFine(com.hazelcast.jet.impl.util.LoggingUtil.logFine) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) Entry(java.util.Map.Entry) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Queue(java.util.Queue) SnapshotStats(com.hazelcast.jet.impl.JobExecutionRecord.SnapshotStats) IMap(com.hazelcast.map.IMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) SnapshotPhase1Result(com.hazelcast.jet.impl.operation.SnapshotPhase1Operation.SnapshotPhase1Result) ArrayList(java.util.ArrayList) Map(java.util.Map) IMap(com.hazelcast.map.IMap)

Example 3 with ExecutionNotFoundException

use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.

the class GetLocalJobMetricsOperation method run.

@Override
public void run() {
    JetServiceBackend service = getJetServiceBackend();
    ExecutionContext executionContext = service.getJobExecutionService().getExecutionContext(executionId);
    if (executionContext == null) {
        throw new ExecutionNotFoundException(executionId);
    }
    response = executionContext.getJobMetrics();
}
Also used : ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend)

Example 4 with ExecutionNotFoundException

use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.

the class JobExecutionService method assertExecutionContext.

@Nonnull
public ExecutionContext assertExecutionContext(Address callerAddress, long jobId, long executionId, String callerOpName) {
    Address masterAddress = nodeEngine.getMasterAddress();
    if (!callerAddress.equals(masterAddress)) {
        failIfNotRunning();
        throw new IllegalStateException(String.format("Caller %s cannot do '%s' for %s: it is not the master, the master is %s", callerAddress, callerOpName, jobIdAndExecutionId(jobId, executionId), masterAddress));
    }
    failIfNotRunning();
    ExecutionContext executionContext = executionContexts.get(executionId);
    if (executionContext == null) {
        throw new ExecutionNotFoundException(String.format("%s not found for coordinator %s for '%s'", jobIdAndExecutionId(jobId, executionId), callerAddress, callerOpName));
    } else if (!(executionContext.coordinator().equals(callerAddress) && executionContext.jobId() == jobId)) {
        throw new IllegalStateException(String.format("%s, originally from coordinator %s, cannot do '%s' by coordinator %s and execution %s", executionContext.jobNameAndExecutionId(), executionContext.coordinator(), callerOpName, callerAddress, idToString(executionId)));
    }
    return executionContext;
}
Also used : ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ExecutionContext(com.hazelcast.jet.impl.execution.ExecutionContext) Address(com.hazelcast.cluster.Address) Nonnull(javax.annotation.Nonnull)

Example 5 with ExecutionNotFoundException

use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.

the class MasterJobContext method onStartExecutionComplete.

private void onStartExecutionComplete(Throwable error, Collection<Entry<MemberInfo, Object>> responses) {
    JobStatus status = mc.jobStatus();
    if (status != STARTING && status != RUNNING) {
        logCannotComplete(error);
        error = new IllegalStateException("Job coordination failed");
    }
    setJobMetrics(responses.stream().filter(en -> en.getValue() instanceof RawJobMetrics).map(e1 -> (RawJobMetrics) e1.getValue()).collect(Collectors.toList()));
    if (error instanceof JobTerminateRequestedException && ((JobTerminateRequestedException) error).mode().isWithTerminalSnapshot()) {
        Throwable finalError = error;
        // The terminal snapshot on members is always completed before replying to StartExecutionOp.
        // However, the response to snapshot operations can be processed after the response to
        // StartExecutionOp, so wait for that too.
        mc.snapshotContext().terminalSnapshotFuture().whenCompleteAsync(withTryCatch(logger, (r, e) -> finalizeJob(finalError)));
    } else {
        if (error instanceof ExecutionNotFoundException) {
            // If the StartExecutionOperation didn't find the execution, it means that it was cancelled.
            if (requestedTerminationMode != null) {
                // This cancellation can be because the master cancelled it. If that's the case, convert the exception
                // to JobTerminateRequestedException.
                error = new JobTerminateRequestedException(requestedTerminationMode).initCause(error);
            }
        // The cancellation can also happen if some participant left and
        // the target cancelled the execution locally in JobExecutionService.onMemberRemoved().
        // We keep this (and possibly other) exceptions as they are
        // and let the execution complete with failure.
        }
        finalizeJob(error);
    }
}
Also used : JobStatus(com.hazelcast.jet.core.JobStatus) Address(com.hazelcast.cluster.Address) SUSPEND(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.SUSPEND) NOT_RUNNING(com.hazelcast.jet.core.JobStatus.NOT_RUNNING) GetLocalJobMetricsOperation(com.hazelcast.jet.impl.operation.GetLocalJobMetricsOperation) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) NonCompletableFuture(com.hazelcast.jet.impl.util.NonCompletableFuture) ExceptionUtil.isTopologyException(com.hazelcast.jet.impl.util.ExceptionUtil.isTopologyException) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) SourceProcessors.readMapP(com.hazelcast.jet.core.processor.SourceProcessors.readMapP) RESTART(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate.RESTART) JetDelegatingClassLoader(com.hazelcast.jet.impl.deployment.JetDelegatingClassLoader) TerminatedWithSnapshotException(com.hazelcast.jet.impl.exception.TerminatedWithSnapshotException) Collectors.toMap(java.util.stream.Collectors.toMap) Functions.entryKey(com.hazelcast.function.Functions.entryKey) MemberInfo(com.hazelcast.internal.cluster.MemberInfo) Map(java.util.Map) STARTING(com.hazelcast.jet.core.JobStatus.STARTING) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) DAG(com.hazelcast.jet.core.DAG) JobStatus(com.hazelcast.jet.core.JobStatus) ExceptionUtil(com.hazelcast.jet.impl.util.ExceptionUtil) JobMetrics(com.hazelcast.jet.core.metrics.JobMetrics) CancellationException(java.util.concurrent.CancellationException) CANCEL_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_GRACEFUL) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) Set(java.util.Set) UUID(java.util.UUID) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) CANCEL_FORCEFUL(com.hazelcast.jet.impl.TerminationMode.CANCEL_FORCEFUL) Objects(java.util.Objects) Util(com.hazelcast.jet.impl.util.Util) List(java.util.List) Util.idToString(com.hazelcast.jet.Util.idToString) ExecutionPlan(com.hazelcast.jet.impl.execution.init.ExecutionPlan) MetricNames(com.hazelcast.jet.core.metrics.MetricNames) Entry(java.util.Map.Entry) TopologyChangedException(com.hazelcast.jet.core.TopologyChangedException) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) JetDisabledException(com.hazelcast.jet.impl.exception.JetDisabledException) LoggingUtil(com.hazelcast.jet.impl.util.LoggingUtil) ExecutionPlanBuilder.createExecutionPlans(com.hazelcast.jet.impl.execution.init.ExecutionPlanBuilder.createExecutionPlans) Collectors.partitioningBy(java.util.stream.Collectors.partitioningBy) TerminateExecutionOperation(com.hazelcast.jet.impl.operation.TerminateExecutionOperation) ExceptionUtil.isRestartableException(com.hazelcast.jet.impl.util.ExceptionUtil.isRestartableException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LoggingUtil.logFinest(com.hazelcast.jet.impl.util.LoggingUtil.logFinest) Util.doWithClassLoader(com.hazelcast.jet.impl.util.Util.doWithClassLoader) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ExecutionService(com.hazelcast.spi.impl.executionservice.ExecutionService) StartExecutionOperation(com.hazelcast.jet.impl.operation.StartExecutionOperation) Function(java.util.function.Function) Supplier(java.util.function.Supplier) Util.formatJobDuration(com.hazelcast.jet.impl.util.Util.formatJobDuration) ActionAfterTerminate(com.hazelcast.jet.impl.TerminationMode.ActionAfterTerminate) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) ArrayList(java.util.ArrayList) JetException(com.hazelcast.jet.JetException) HashSet(java.util.HashSet) InitExecutionOperation(com.hazelcast.jet.impl.operation.InitExecutionOperation) COORDINATOR(com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.COORDINATOR) ILogger(com.hazelcast.logging.ILogger) SnapshotValidator.validateSnapshot(com.hazelcast.jet.impl.SnapshotValidator.validateSnapshot) ExceptionUtil.rethrow(com.hazelcast.jet.impl.util.ExceptionUtil.rethrow) Operation(com.hazelcast.spi.impl.operationservice.Operation) Util.entry(com.hazelcast.jet.Util.entry) ExceptionUtil.withTryCatch(com.hazelcast.jet.impl.util.ExceptionUtil.withTryCatch) BiConsumer(java.util.function.BiConsumer) MembersView(com.hazelcast.internal.cluster.impl.MembersView) LocalMemberResetException(com.hazelcast.core.LocalMemberResetException) RESTART_GRACEFUL(com.hazelcast.jet.impl.TerminationMode.RESTART_GRACEFUL) Edge(com.hazelcast.jet.core.Edge) Version(com.hazelcast.version.Version) EXPORTED_SNAPSHOTS_PREFIX(com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX) Nonnull(javax.annotation.Nonnull) Tuple2(com.hazelcast.jet.datamodel.Tuple2) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) Measurement(com.hazelcast.jet.core.metrics.Measurement) SUSPENDED_EXPORTING_SNAPSHOT(com.hazelcast.jet.core.JobStatus.SUSPENDED_EXPORTING_SNAPSHOT) Util.toList(com.hazelcast.jet.impl.util.Util.toList) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) MetricTags(com.hazelcast.jet.core.metrics.MetricTags) NONE(com.hazelcast.jet.config.ProcessingGuarantee.NONE) Consumer(java.util.function.Consumer) Vertex(com.hazelcast.jet.core.Vertex) Tuple2.tuple2(com.hazelcast.jet.datamodel.Tuple2.tuple2) CustomClassLoadedObject.deserializeWithCustomClassLoader(com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject.deserializeWithCustomClassLoader) ExceptionUtil.peel(com.hazelcast.jet.impl.util.ExceptionUtil.peel) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) RUNNING(com.hazelcast.jet.core.JobStatus.RUNNING) Collections(java.util.Collections) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) ExecutionNotFoundException(com.hazelcast.jet.impl.exception.ExecutionNotFoundException) RawJobMetrics(com.hazelcast.jet.impl.metrics.RawJobMetrics) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException)

Aggregations

ExecutionNotFoundException (com.hazelcast.jet.impl.exception.ExecutionNotFoundException)5 MemberInfo (com.hazelcast.internal.cluster.MemberInfo)3 JetException (com.hazelcast.jet.JetException)3 Nonnull (javax.annotation.Nonnull)3 Address (com.hazelcast.cluster.Address)2 Util.idToString (com.hazelcast.jet.Util.idToString)2 RUNNING (com.hazelcast.jet.core.JobStatus.RUNNING)2 EXPORTED_SNAPSHOTS_PREFIX (com.hazelcast.jet.impl.JobRepository.EXPORTED_SNAPSHOTS_PREFIX)2 SnapshotPhase2Operation (com.hazelcast.jet.impl.operation.SnapshotPhase2Operation)2 LocalMemberResetException (com.hazelcast.core.LocalMemberResetException)1 Functions.entryKey (com.hazelcast.function.Functions.entryKey)1 MembersView (com.hazelcast.internal.cluster.impl.MembersView)1 Job (com.hazelcast.jet.Job)1 Util.entry (com.hazelcast.jet.Util.entry)1 NONE (com.hazelcast.jet.config.ProcessingGuarantee.NONE)1 DAG (com.hazelcast.jet.core.DAG)1 Edge (com.hazelcast.jet.core.Edge)1 Edge.between (com.hazelcast.jet.core.Edge.between)1 JobStatus (com.hazelcast.jet.core.JobStatus)1 COMPLETED (com.hazelcast.jet.core.JobStatus.COMPLETED)1