use of com.hazelcast.jet.impl.operation.StartExecutionOperation in project hazelcast-jet by hazelcast.
the class MasterContext method invokeStartExecution.
// If a participant leaves or the execution fails in a participant locally, executions are cancelled
// on the remaining participants and the callback is completed after all invocations return.
private void invokeStartExecution() {
logger.fine("Executing " + jobIdString());
long executionId = this.executionId;
ExecutionInvocationCallback callback = new ExecutionInvocationCallback(executionId);
cancellationToken.whenCompleted(callback::cancelInvocations);
CompletionToken executionRestartToken = new CompletionToken(logger);
executionRestartToken.whenCompleted(callback::cancelInvocations);
Function<ExecutionPlan, Operation> operationCtor = plan -> new StartExecutionOperation(jobId, executionId);
Consumer<Map<MemberInfo, Object>> completionCallback = results -> {
this.executionRestartToken = null;
onExecuteStepCompleted(results, executionRestartToken.isCompleted());
};
// We must set executionRestartToken before we call invoke() method because once all invocations
// are done, executionRestartToken will be reset. Therefore, setting it after the invoke() call is racy.
this.executionRestartToken = executionRestartToken;
jobStatus.set(RUNNING);
invoke(operationCtor, completionCallback, callback);
if (isSnapshottingEnabled()) {
coordinationService.scheduleSnapshot(jobId, executionId);
}
}
use of com.hazelcast.jet.impl.operation.StartExecutionOperation in project hazelcast by hazelcast.
the class MasterJobContext method onStartExecutionComplete.
private void onStartExecutionComplete(Throwable error, Collection<Entry<MemberInfo, Object>> responses) {
JobStatus status = mc.jobStatus();
if (status != STARTING && status != RUNNING) {
logCannotComplete(error);
error = new IllegalStateException("Job coordination failed");
}
setJobMetrics(responses.stream().filter(en -> en.getValue() instanceof RawJobMetrics).map(e1 -> (RawJobMetrics) e1.getValue()).collect(Collectors.toList()));
if (error instanceof JobTerminateRequestedException && ((JobTerminateRequestedException) error).mode().isWithTerminalSnapshot()) {
Throwable finalError = error;
// The terminal snapshot on members is always completed before replying to StartExecutionOp.
// However, the response to snapshot operations can be processed after the response to
// StartExecutionOp, so wait for that too.
mc.snapshotContext().terminalSnapshotFuture().whenCompleteAsync(withTryCatch(logger, (r, e) -> finalizeJob(finalError)));
} else {
if (error instanceof ExecutionNotFoundException) {
// If the StartExecutionOperation didn't find the execution, it means that it was cancelled.
if (requestedTerminationMode != null) {
// This cancellation can be because the master cancelled it. If that's the case, convert the exception
// to JobTerminateRequestedException.
error = new JobTerminateRequestedException(requestedTerminationMode).initCause(error);
}
// The cancellation can also happen if some participant left and
// the target cancelled the execution locally in JobExecutionService.onMemberRemoved().
// We keep this (and possibly other) exceptions as they are
// and let the execution complete with failure.
}
finalizeJob(error);
}
}
Aggregations