use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.
the class MasterContext method setFinalResult.
void setFinalResult(Throwable failure) {
JobStatus status = isSuccess(failure) ? COMPLETED : FAILED;
jobStatus.set(status);
if (failure == null) {
completionFuture.internalComplete();
} else {
completionFuture.internalCompleteExceptionally(failure);
}
}
use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.
the class MasterContext method invokeCompleteExecution.
private void invokeCompleteExecution(Throwable error) {
JobStatus status = jobStatus();
Throwable finalError;
if (status == STARTING || status == RESTARTING || status == RUNNING) {
logger.fine("Completing " + jobIdString());
finalError = error;
} else {
if (error != null) {
logger.severe("Cannot properly complete failed " + jobIdString() + ": status is " + status, error);
} else {
logger.severe("Cannot properly complete " + jobIdString() + ": status is " + status);
}
finalError = new IllegalStateException("Job coordination failed.");
}
Function<ExecutionPlan, Operation> operationCtor = plan -> new CompleteExecutionOperation(executionId, finalError);
invoke(operationCtor, responses -> finalizeJob(error), null);
}
use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.
the class MasterContext method tryStartJob.
/**
* Starts execution of the job if it is not already completed, cancelled or failed.
* If the job is already cancelled, the job completion procedure is triggered.
* If the job quorum is not satisfied, job restart is rescheduled.
* If there was a membership change and the partition table is not completely
* fixed yet, job restart is rescheduled.
*/
void tryStartJob(Function<Long, Long> executionIdSupplier) {
if (!setJobStatusToStarting()) {
return;
}
if (scheduleRestartIfQuorumAbsent() || scheduleRestartIfClusterIsNotSafe()) {
return;
}
DAG dag;
try {
dag = deserializeDAG();
} catch (Exception e) {
logger.warning("DAG deserialization failed", e);
finalizeJob(e);
return;
}
// save a copy of the vertex list, because it is going to change
vertices = new HashSet<>();
dag.iterator().forEachRemaining(vertices::add);
executionId = executionIdSupplier.apply(jobId);
// last started snapshot complete or not complete. The next started snapshot must be greater than this number
long lastSnapshotId = NO_SNAPSHOT;
if (isSnapshottingEnabled()) {
Long snapshotIdToRestore = snapshotRepository.latestCompleteSnapshot(jobId);
snapshotRepository.deleteAllSnapshotsExceptOne(jobId, snapshotIdToRestore);
Long lastStartedSnapshot = snapshotRepository.latestStartedSnapshot(jobId);
if (snapshotIdToRestore != null) {
logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotIdToRestore);
rewriteDagWithSnapshotRestore(dag, snapshotIdToRestore);
} else {
logger.info("No previous snapshot for " + jobIdString() + " found.");
}
if (lastStartedSnapshot != null) {
lastSnapshotId = lastStartedSnapshot;
}
}
MembersView membersView = getMembersView();
ClassLoader previousCL = swapContextClassLoader(coordinationService.getClassLoader(jobId));
try {
int defaultLocalParallelism = getJetInstance(nodeEngine).getConfig().getInstanceConfig().getCooperativeThreadCount();
logger.info("Start executing " + jobIdString() + ", status " + jobStatus() + "\n" + dag.toString(defaultLocalParallelism));
logger.fine("Building execution plan for " + jobIdString());
executionPlanMap = createExecutionPlans(nodeEngine, membersView, dag, getJobConfig(), lastSnapshotId);
} catch (Exception e) {
logger.severe("Exception creating execution plan for " + jobIdString(), e);
finalizeJob(e);
return;
} finally {
Thread.currentThread().setContextClassLoader(previousCL);
}
logger.fine("Built execution plans for " + jobIdString());
Set<MemberInfo> participants = executionPlanMap.keySet();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(jobId, executionId, membersView.getVersion(), participants, nodeEngine.getSerializationService().toData(plan));
invoke(operationCtor, this::onInitStepCompleted, null);
}
use of com.hazelcast.jet.core.JobStatus in project hazelcast-jet by hazelcast.
the class JobCoordinationService method getJobStatus.
/**
* Returns the job status or fails with {@link JobNotFoundException}
* if the requested job is not found
*/
public JobStatus getJobStatus(long jobId) {
if (!isMaster()) {
throw new JetException("Cannot query status of Job " + idToString(jobId) + ". Master address: " + nodeEngine.getClusterService().getMasterAddress());
}
// first check if there is a job result present.
// this map is updated first during completion.
JobResult jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
return jobResult.getJobStatus();
}
// check if there a master context for running job
MasterContext currentMasterContext = masterContexts.get(jobId);
if (currentMasterContext != null) {
JobStatus jobStatus = currentMasterContext.jobStatus();
if (jobStatus == JobStatus.RUNNING) {
return currentMasterContext.isCancelled() ? JobStatus.COMPLETING : JobStatus.RUNNING;
}
return jobStatus;
}
// no master context found, job might be just submitted
JobRecord jobRecord = jobRepository.getJobRecord(jobId);
if (jobRecord == null) {
// no job record found, but check job results again
// since job might have been completed meanwhile.
jobResult = jobRepository.getJobResult(jobId);
if (jobResult != null) {
return jobResult.getJobStatus();
}
throw new JobNotFoundException(jobId);
} else {
return NOT_STARTED;
}
}
use of com.hazelcast.jet.core.JobStatus in project hazelcast by hazelcast.
the class JobCoordinationService method getJobStatus.
/**
* Returns the job status or fails with {@link JobNotFoundException}
* if the requested job is not found.
*/
public CompletableFuture<JobStatus> getJobStatus(long jobId) {
return callWithJob(jobId, mc -> {
// When the job finishes running, we write NOT_RUNNING to jobStatus first and then
// write null to requestedTerminationMode (see MasterJobContext.finalizeJob()). We
// have to read them in the opposite order.
TerminationMode terminationMode = mc.jobContext().requestedTerminationMode();
JobStatus jobStatus = mc.jobStatus();
return jobStatus == RUNNING && terminationMode != null ? COMPLETING : jobStatus;
}, JobResult::getJobStatus, jobRecord -> NOT_RUNNING, jobExecutionRecord -> jobExecutionRecord.isSuspended() ? SUSPENDED : NOT_RUNNING);
}
Aggregations