use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.
the class MasterSnapshotContext method onSnapshotPhase2Complete.
/**
* @param phase1Error error from the phase-1. Null if phase-1 was successful.
* @param responses collected responses from the members
* @param snapshotFlags flags of the snapshot
* @param future future to be completed when the phase-2 is fully completed
* @param startTime phase-1 start time
*/
private void onSnapshotPhase2Complete(String phase1Error, Collection<Entry<MemberInfo, Object>> responses, long executionId, long snapshotId, int snapshotFlags, @Nullable CompletableFuture<Void> future, long startTime) {
mc.coordinationService().submitToCoordinatorThread(() -> {
if (executionId != mc.executionId()) {
LoggingUtil.logFine(logger, "%s: ignoring responses for snapshot %s phase 2: " + "the responses are from a different execution: %s. Responses: %s", mc.jobIdString(), snapshotId, idToString(executionId), responses);
return;
}
for (Entry<MemberInfo, Object> response : responses) {
if (response.getValue() instanceof Throwable) {
logger.log(response.getValue() instanceof ExecutionNotFoundException ? Level.FINE : Level.WARNING, SnapshotPhase2Operation.class.getSimpleName() + " for snapshot " + snapshotId + " in " + mc.jobIdString() + " failed on member: " + response, (Throwable) response.getValue());
}
}
if (future != null) {
if (phase1Error == null) {
future.complete(null);
} else {
future.completeExceptionally(new JetException(phase1Error));
}
}
mc.lock();
try {
// double-check the execution ID after locking
if (executionId != mc.executionId()) {
logger.fine("Not completing terminalSnapshotFuture on " + mc.jobIdString() + ", new execution " + "already started, snapshot was for executionId=" + idToString(executionId));
return;
}
assert snapshotInProgress : "snapshot not in progress";
snapshotInProgress = false;
if (SnapshotFlags.isTerminal(snapshotFlags)) {
// after a terminal snapshot, no more snapshots are scheduled in this execution
boolean completedNow = terminalSnapshotFuture.complete(null);
assert completedNow : "terminalSnapshotFuture was already completed";
if (phase1Error != null) {
// If the terminal snapshot failed, the executions might not terminate on some members
// normally and we don't care if they do - the snapshot is done and we have to bring the
// execution down. Let's execute the CompleteExecutionOperation to terminate them.
mc.jobContext().cancelExecutionInvocations(mc.jobId(), mc.executionId(), null, null);
}
} else if (!SnapshotFlags.isExport(snapshotFlags)) {
// if this snapshot was an automatic snapshot, schedule the next one
mc.coordinationService().scheduleSnapshot(mc, executionId);
}
} finally {
mc.unlock();
}
if (logger.isFineEnabled()) {
logger.fine("Snapshot " + snapshotId + " for " + mc.jobIdString() + " completed in " + (System.currentTimeMillis() - startTime) + "ms, status=" + (phase1Error == null ? "success" : "failure: " + phase1Error));
}
tryBeginSnapshot();
});
}
use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.
the class MasterSnapshotContext method onSnapshotPhase1Complete.
/**
* @param responses collected responses from the members
* @param snapshotMapName the IMap name to which the snapshot is written
* @param snapshotFlags flags of the snapshot
* @param future a future to be completed when the phase-2 is fully completed
*/
private void onSnapshotPhase1Complete(Collection<Map.Entry<MemberInfo, Object>> responses, long executionId, long snapshotId, String snapshotMapName, int snapshotFlags, @Nullable CompletableFuture<Void> future) {
mc.coordinationService().submitToCoordinatorThread(() -> {
SnapshotPhase1Result mergedResult = new SnapshotPhase1Result();
List<CompletableFuture<Void>> missingResponses = new ArrayList<>();
for (Map.Entry<MemberInfo, Object> entry : responses) {
// the response is either SnapshotOperationResult or an exception, see #invokeOnParticipants() method
Object response = entry.getValue();
if (response instanceof Throwable) {
// all the responses to an array, and we'll wait for them later.
if (response instanceof ExecutionNotFoundException) {
missingResponses.add(mc.startOperationResponses().get(entry.getKey().getAddress()));
continue;
}
response = new SnapshotPhase1Result(0, 0, 0, (Throwable) response);
}
mergedResult.merge((SnapshotPhase1Result) response);
}
if (!missingResponses.isEmpty()) {
LoggingUtil.logFine(logger, "%s will wait for %d responses to StartExecutionOperation in " + "onSnapshotPhase1Complete()", mc.jobIdString(), missingResponses.size());
}
// In a typical case `missingResponses` will be empty. It will be non-empty if some member completed
// its execution and some other did not, or near the completion of a job, e.g. after a failure.
// `allOf` for an empty array returns a completed future immediately.
// Another edge case is that we'll be waiting for a response to start operation from a next execution,
// which can happen much later - we could handle it, but we ignore it: when it arrives, we'll find a
// changed executionId and ignore the response. It also doesn't occupy a thread - we're using a future.
CompletableFuture.allOf(missingResponses.toArray(new CompletableFuture[0])).whenComplete(withTryCatch(logger, (r, t) -> onSnapshotPhase1CompleteWithStartResponses(responses, executionId, snapshotId, snapshotMapName, snapshotFlags, future, mergedResult, missingResponses)));
});
}
use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.
the class GetLocalJobMetricsOperation method run.
@Override
public void run() {
JetServiceBackend service = getJetServiceBackend();
ExecutionContext executionContext = service.getJobExecutionService().getExecutionContext(executionId);
if (executionContext == null) {
throw new ExecutionNotFoundException(executionId);
}
response = executionContext.getJobMetrics();
}
use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.
the class JobExecutionService method assertExecutionContext.
@Nonnull
public ExecutionContext assertExecutionContext(Address callerAddress, long jobId, long executionId, String callerOpName) {
Address masterAddress = nodeEngine.getMasterAddress();
if (!callerAddress.equals(masterAddress)) {
failIfNotRunning();
throw new IllegalStateException(String.format("Caller %s cannot do '%s' for %s: it is not the master, the master is %s", callerAddress, callerOpName, jobIdAndExecutionId(jobId, executionId), masterAddress));
}
failIfNotRunning();
ExecutionContext executionContext = executionContexts.get(executionId);
if (executionContext == null) {
throw new ExecutionNotFoundException(String.format("%s not found for coordinator %s for '%s'", jobIdAndExecutionId(jobId, executionId), callerAddress, callerOpName));
} else if (!(executionContext.coordinator().equals(callerAddress) && executionContext.jobId() == jobId)) {
throw new IllegalStateException(String.format("%s, originally from coordinator %s, cannot do '%s' by coordinator %s and execution %s", executionContext.jobNameAndExecutionId(), executionContext.coordinator(), callerOpName, callerAddress, idToString(executionId)));
}
return executionContext;
}
use of com.hazelcast.jet.impl.exception.ExecutionNotFoundException in project hazelcast by hazelcast.
the class MasterJobContext method onStartExecutionComplete.
private void onStartExecutionComplete(Throwable error, Collection<Entry<MemberInfo, Object>> responses) {
JobStatus status = mc.jobStatus();
if (status != STARTING && status != RUNNING) {
logCannotComplete(error);
error = new IllegalStateException("Job coordination failed");
}
setJobMetrics(responses.stream().filter(en -> en.getValue() instanceof RawJobMetrics).map(e1 -> (RawJobMetrics) e1.getValue()).collect(Collectors.toList()));
if (error instanceof JobTerminateRequestedException && ((JobTerminateRequestedException) error).mode().isWithTerminalSnapshot()) {
Throwable finalError = error;
// The terminal snapshot on members is always completed before replying to StartExecutionOp.
// However, the response to snapshot operations can be processed after the response to
// StartExecutionOp, so wait for that too.
mc.snapshotContext().terminalSnapshotFuture().whenCompleteAsync(withTryCatch(logger, (r, e) -> finalizeJob(finalError)));
} else {
if (error instanceof ExecutionNotFoundException) {
// If the StartExecutionOperation didn't find the execution, it means that it was cancelled.
if (requestedTerminationMode != null) {
// This cancellation can be because the master cancelled it. If that's the case, convert the exception
// to JobTerminateRequestedException.
error = new JobTerminateRequestedException(requestedTerminationMode).initCause(error);
}
// The cancellation can also happen if some participant left and
// the target cancelled the execution locally in JobExecutionService.onMemberRemoved().
// We keep this (and possibly other) exceptions as they are
// and let the execution complete with failure.
}
finalizeJob(error);
}
}
Aggregations