use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast-jet by hazelcast.
the class MasterContext method invokeCompleteExecution.
private void invokeCompleteExecution(Throwable error) {
JobStatus status = jobStatus();
Throwable finalError;
if (status == STARTING || status == RESTARTING || status == RUNNING) {
logger.fine("Completing " + jobIdString());
finalError = error;
} else {
if (error != null) {
logger.severe("Cannot properly complete failed " + jobIdString() + ": status is " + status, error);
} else {
logger.severe("Cannot properly complete " + jobIdString() + ": status is " + status);
}
finalError = new IllegalStateException("Job coordination failed.");
}
Function<ExecutionPlan, Operation> operationCtor = plan -> new CompleteExecutionOperation(executionId, finalError);
invoke(operationCtor, responses -> finalizeJob(error), null);
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast-jet by hazelcast.
the class MasterContext method tryStartJob.
/**
* Starts execution of the job if it is not already completed, cancelled or failed.
* If the job is already cancelled, the job completion procedure is triggered.
* If the job quorum is not satisfied, job restart is rescheduled.
* If there was a membership change and the partition table is not completely
* fixed yet, job restart is rescheduled.
*/
void tryStartJob(Function<Long, Long> executionIdSupplier) {
if (!setJobStatusToStarting()) {
return;
}
if (scheduleRestartIfQuorumAbsent() || scheduleRestartIfClusterIsNotSafe()) {
return;
}
DAG dag;
try {
dag = deserializeDAG();
} catch (Exception e) {
logger.warning("DAG deserialization failed", e);
finalizeJob(e);
return;
}
// save a copy of the vertex list, because it is going to change
vertices = new HashSet<>();
dag.iterator().forEachRemaining(vertices::add);
executionId = executionIdSupplier.apply(jobId);
// last started snapshot complete or not complete. The next started snapshot must be greater than this number
long lastSnapshotId = NO_SNAPSHOT;
if (isSnapshottingEnabled()) {
Long snapshotIdToRestore = snapshotRepository.latestCompleteSnapshot(jobId);
snapshotRepository.deleteAllSnapshotsExceptOne(jobId, snapshotIdToRestore);
Long lastStartedSnapshot = snapshotRepository.latestStartedSnapshot(jobId);
if (snapshotIdToRestore != null) {
logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotIdToRestore);
rewriteDagWithSnapshotRestore(dag, snapshotIdToRestore);
} else {
logger.info("No previous snapshot for " + jobIdString() + " found.");
}
if (lastStartedSnapshot != null) {
lastSnapshotId = lastStartedSnapshot;
}
}
MembersView membersView = getMembersView();
ClassLoader previousCL = swapContextClassLoader(coordinationService.getClassLoader(jobId));
try {
int defaultLocalParallelism = getJetInstance(nodeEngine).getConfig().getInstanceConfig().getCooperativeThreadCount();
logger.info("Start executing " + jobIdString() + ", status " + jobStatus() + "\n" + dag.toString(defaultLocalParallelism));
logger.fine("Building execution plan for " + jobIdString());
executionPlanMap = createExecutionPlans(nodeEngine, membersView, dag, getJobConfig(), lastSnapshotId);
} catch (Exception e) {
logger.severe("Exception creating execution plan for " + jobIdString(), e);
finalizeJob(e);
return;
} finally {
Thread.currentThread().setContextClassLoader(previousCL);
}
logger.fine("Built execution plans for " + jobIdString());
Set<MemberInfo> participants = executionPlanMap.keySet();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(jobId, executionId, membersView.getVersion(), participants, nodeEngine.getSerializationService().toData(plan));
invoke(operationCtor, this::onInitStepCompleted, null);
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast-jet by hazelcast.
the class MasterContext method invokeOnParticipants.
private void invokeOnParticipants(Map<MemberInfo, InternalCompletableFuture<Object>> futures, CompletableFuture<Void> doneFuture, Function<ExecutionPlan, Operation> opCtor) {
AtomicInteger remainingCount = new AtomicInteger(executionPlanMap.size());
for (Entry<MemberInfo, ExecutionPlan> e : executionPlanMap.entrySet()) {
MemberInfo member = e.getKey();
Operation op = opCtor.apply(e.getValue());
InternalCompletableFuture<Object> future = nodeEngine.getOperationService().createInvocationBuilder(JetService.SERVICE_NAME, op, member.getAddress()).setDoneCallback(() -> {
if (remainingCount.decrementAndGet() == 0) {
doneFuture.complete(null);
}
}).invoke();
futures.put(member, future);
}
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast-jet by hazelcast.
the class MasterContext method invokeStartExecution.
// If a participant leaves or the execution fails in a participant locally, executions are cancelled
// on the remaining participants and the callback is completed after all invocations return.
private void invokeStartExecution() {
logger.fine("Executing " + jobIdString());
long executionId = this.executionId;
ExecutionInvocationCallback callback = new ExecutionInvocationCallback(executionId);
cancellationToken.whenCompleted(callback::cancelInvocations);
CompletionToken executionRestartToken = new CompletionToken(logger);
executionRestartToken.whenCompleted(callback::cancelInvocations);
Function<ExecutionPlan, Operation> operationCtor = plan -> new StartExecutionOperation(jobId, executionId);
Consumer<Map<MemberInfo, Object>> completionCallback = results -> {
this.executionRestartToken = null;
onExecuteStepCompleted(results, executionRestartToken.isCompleted());
};
// We must set executionRestartToken before we call invoke() method because once all invocations
// are done, executionRestartToken will be reset. Therefore, setting it after the invoke() call is racy.
this.executionRestartToken = executionRestartToken;
jobStatus.set(RUNNING);
invoke(operationCtor, completionCallback, callback);
if (isSnapshottingEnabled()) {
coordinationService.scheduleSnapshot(jobId, executionId);
}
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast-jet by hazelcast.
the class JobExecutionService method initExecution.
/**
* Initiates the given execution if the local node accepts the coordinator
* as its master, and has an up-to-date member list information.
* <ul><li>
* If the local node has a stale member list, it retries the init operation
* until it receives the new member list from the master.
* </li><li>
* If the local node detects that the member list changed after the init
* operation is sent but before executed, then it sends a graceful failure
* so that the job init will be retried properly.
* </li><li>
* If there is an already ongoing execution for the given job, then the
* init execution is retried.
* </li></ul>
*/
public void initExecution(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants, ExecutionPlan plan) {
verifyClusterInformation(jobId, executionId, coordinator, coordinatorMemberListVersion, participants);
failIfNotRunning();
if (!executionContextJobIds.add(jobId)) {
ExecutionContext current = executionContexts.get(executionId);
if (current != null) {
throw new IllegalStateException(String.format("Execution context for %s for coordinator %s already exists for coordinator %s", jobAndExecutionId(jobId, executionId), coordinator, current.coordinator()));
}
executionContexts.values().stream().filter(e -> e.jobId() == jobId).forEach(e -> logger.fine(String.format("Execution context for %s for coordinator %s already exists" + " with local execution %s for coordinator %s", jobAndExecutionId(jobId, executionId), coordinator, idToString(e.jobId()), e.coordinator())));
throw new RetryableHazelcastException();
}
Set<Address> addresses = participants.stream().map(MemberInfo::getAddress).collect(toSet());
ExecutionContext created = new ExecutionContext(nodeEngine, taskletExecutionService, jobId, executionId, coordinator, addresses);
try {
created.initialize(plan);
} finally {
executionContexts.put(executionId, created);
}
logger.info("Execution plan for " + jobAndExecutionId(jobId, executionId) + " initialized");
}
Aggregations