use of com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.EXECUTION in project hazelcast by hazelcast.
the class JobExecutionService method checkExecutions.
/**
* See also javadoc at {@link CheckLightJobsOperation}.
*/
private void checkExecutions() {
try {
long now = System.nanoTime();
long uninitializedContextThreshold = now - UNINITIALIZED_CONTEXT_MAX_AGE_NS;
Map<Address, List<Long>> executionsPerMember = new HashMap<>();
for (ExecutionContext ctx : executionContexts.values()) {
if (!ctx.isLightJob()) {
continue;
}
Address coordinator = ctx.coordinator();
if (coordinator != null) {
// if coordinator is known, add execution to the list to check
executionsPerMember.computeIfAbsent(coordinator, k -> new ArrayList<>()).add(ctx.executionId());
} else {
// if coordinator is not known, remove execution if it's not known for too long
if (ctx.getCreatedOn() <= uninitializedContextThreshold) {
LoggingUtil.logFine(logger, "Terminating light job %s because it wasn't initialized during %d seconds", idToString(ctx.executionId()), NANOSECONDS.toSeconds(UNINITIALIZED_CONTEXT_MAX_AGE_NS));
terminateExecution0(ctx, TerminationMode.CANCEL_FORCEFUL, new CancellationException());
}
}
}
// submit the query to the coordinator
for (Entry<Address, List<Long>> en : executionsPerMember.entrySet()) {
long[] executionIds = en.getValue().stream().mapToLong(Long::longValue).toArray();
Operation op = new CheckLightJobsOperation(executionIds);
InvocationFuture<long[]> future = nodeEngine.getOperationService().createInvocationBuilder(JetServiceBackend.SERVICE_NAME, op, en.getKey()).invoke();
future.whenComplete((r, t) -> {
if (t instanceof TargetNotMemberException) {
// if the target isn't a member, then all executions are unknown
r = executionIds;
} else if (t != null) {
logger.warning("Failed to check light job state with coordinator " + en.getKey() + ": " + t, t);
return;
}
assert r != null;
for (long executionId : r) {
ExecutionContext execCtx = executionContexts.get(executionId);
if (execCtx != null) {
logger.fine("Terminating light job " + idToString(executionId) + " because the coordinator doesn't know it");
terminateExecution0(execCtx, TerminationMode.CANCEL_FORCEFUL, new CancellationException());
}
}
});
}
// clean up failedJobs
failedJobs.values().removeIf(expiryTime -> expiryTime < now);
} catch (Throwable e) {
logger.severe("Failed to query live light executions: " + e, e);
}
}
use of com.hazelcast.jet.impl.JobClassLoaderService.JobPhase.EXECUTION in project hazelcast by hazelcast.
the class JobExecutionService method runLightJob.
public CompletableFuture<RawJobMetrics> runLightJob(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants, ExecutionPlan plan) {
assert executionId == jobId : "executionId(" + idToString(executionId) + ") != jobId(" + idToString(jobId) + ")";
verifyClusterInformation(jobId, executionId, coordinator, coordinatorMemberListVersion, participants);
failIfNotRunning();
ExecutionContext execCtx;
synchronized (mutex) {
addExecutionContextJobId(jobId, executionId, coordinator);
execCtx = executionContexts.computeIfAbsent(executionId, x -> new ExecutionContext(nodeEngine, jobId, executionId, true));
}
try {
Set<Address> addresses = participants.stream().map(MemberInfo::getAddress).collect(toSet());
ClassLoader jobCl = jobClassloaderService.getClassLoader(jobId);
// We don't create the CL for light jobs.
assert jobClassloaderService.getClassLoader(jobId) == null;
doWithClassLoader(jobCl, () -> execCtx.initialize(coordinator, addresses, plan));
} catch (Throwable e) {
completeExecution(execCtx, new CancellationException());
throw e;
}
// initial log entry with all of jobId, jobName, executionId
if (logger.isFineEnabled()) {
logger.fine("Execution plan for light job ID=" + idToString(jobId) + ", jobName=" + (execCtx.jobName() != null ? '\'' + execCtx.jobName() + '\'' : "null") + ", executionId=" + idToString(executionId) + " initialized, will start the execution");
}
return beginExecution0(execCtx, false);
}
Aggregations