use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast by hazelcast.
the class MasterJobContext method tryStartJob.
/**
* Starts the execution of the job if it is not already completed,
* cancelled or failed.
* <p>
* If the job is already cancelled, triggers the job completion procedure.
* <p>
* If the job quorum is not satisfied, reschedules the job restart.
* <p>
* If there was a membership change and the partition table is not completely
* fixed yet, reschedules the job restart.
*/
void tryStartJob(Supplier<Long> executionIdSupplier) {
mc.coordinationService().submitToCoordinatorThread(() -> {
executionStartTime = System.currentTimeMillis();
try {
JobExecutionRecord jobExecRec = mc.jobExecutionRecord();
jobExecRec.markExecuted();
Tuple2<DAG, ClassLoader> dagAndClassloader = resolveDagAndCL(executionIdSupplier);
if (dagAndClassloader == null) {
return;
}
DAG dag = dagAndClassloader.f0();
assert dag != null;
ClassLoader classLoader = dagAndClassloader.f1();
// must call this before rewriteDagWithSnapshotRestore()
String dotRepresentation = dag.toDotString(defaultParallelism, defaultQueueSize);
long snapshotId = jobExecRec.snapshotId();
String snapshotName = mc.jobConfig().getInitialSnapshotName();
String mapName = snapshotId >= 0 ? jobExecRec.successfulSnapshotDataMapName(mc.jobId()) : snapshotName != null ? EXPORTED_SNAPSHOTS_PREFIX + snapshotName : null;
if (mapName != null) {
rewriteDagWithSnapshotRestore(dag, snapshotId, mapName, snapshotName);
} else {
logger.info("Didn't find any snapshot to restore for " + mc.jobIdString());
}
MembersView membersView = Util.getMembersView(mc.nodeEngine());
logger.info("Start executing " + mc.jobIdString() + ", execution graph in DOT format:\n" + dotRepresentation + "\nHINT: You can use graphviz or http://viz-js.com to visualize the printed graph.");
logger.fine("Building execution plan for " + mc.jobIdString());
Util.doWithClassLoader(classLoader, () -> mc.setExecutionPlanMap(createExecutionPlans(mc.nodeEngine(), membersView.getMembers(), dag, mc.jobId(), mc.executionId(), mc.jobConfig(), jobExecRec.ongoingSnapshotId(), false, mc.jobRecord().getSubject())));
logger.fine("Built execution plans for " + mc.jobIdString());
Set<MemberInfo> participants = mc.executionPlanMap().keySet();
Version coordinatorVersion = mc.nodeEngine().getLocalMember().getVersion().asVersion();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(mc.jobId(), mc.executionId(), membersView.getVersion(), coordinatorVersion, participants, mc.nodeEngine().getSerializationService().toData(plan), false);
mc.invokeOnParticipants(operationCtor, this::onInitStepCompleted, null, false);
} catch (Throwable e) {
finalizeJob(e);
}
});
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast by hazelcast.
the class MasterContext method invokeOnParticipants.
/**
* @param completionCallback a consumer that will receive a collection
* of member-response pairs, one for each
* member, after all have been received. The
* response value will be either the response
* (including a null response) or an
* exception thrown from the operation (the
* pairs themselves will never be null); size
* will be equal to participant count
* @param individualCallback A callback that will be called after each
* individual participant completes
* @param retryOnTimeoutException if true, operations that threw {@link
* com.hazelcast.core.OperationTimeoutException}
* will be retried
*/
void invokeOnParticipants(Function<ExecutionPlan, Operation> operationCtor, @Nullable Consumer<Collection<Map.Entry<MemberInfo, Object>>> completionCallback, @Nullable BiConsumer<Address, Object> individualCallback, boolean retryOnTimeoutException) {
ConcurrentMap<MemberInfo, Object> responses = new ConcurrentHashMap<>();
AtomicInteger remainingCount = new AtomicInteger(executionPlanMap.size());
for (Entry<MemberInfo, ExecutionPlan> entry : executionPlanMap.entrySet()) {
MemberInfo memberInfo = entry.getKey();
Supplier<Operation> opSupplier = () -> operationCtor.apply(entry.getValue());
invokeOnParticipant(memberInfo, opSupplier, completionCallback, individualCallback, retryOnTimeoutException, responses, remainingCount);
}
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast by hazelcast.
the class MasterSnapshotContext method tryBeginSnapshot.
void tryBeginSnapshot() {
mc.coordinationService().submitToCoordinatorThread(() -> {
boolean isTerminal;
String snapshotMapName;
CompletableFuture<Void> future;
mc.lock();
long localExecutionId;
try {
if (mc.jobStatus() != RUNNING) {
logger.fine("Not beginning snapshot, " + mc.jobIdString() + " is not RUNNING, but " + mc.jobStatus());
return;
}
if (snapshotInProgress) {
logger.fine("Not beginning snapshot since one is already in progress " + mc.jobIdString());
return;
}
if (terminalSnapshotFuture.isDone()) {
logger.fine("Not beginning snapshot since terminal snapshot is already completed " + mc.jobIdString());
return;
}
Tuple3<String, Boolean, CompletableFuture<Void>> requestedSnapshot = snapshotQueue.poll();
if (requestedSnapshot == null) {
return;
}
snapshotInProgress = true;
snapshotMapName = requestedSnapshot.f0();
assert requestedSnapshot.f1() != null;
isTerminal = requestedSnapshot.f1();
future = requestedSnapshot.f2();
mc.jobExecutionRecord().startNewSnapshot(snapshotMapName);
localExecutionId = mc.executionId();
} finally {
mc.unlock();
}
mc.writeJobExecutionRecord(false);
long newSnapshotId = mc.jobExecutionRecord().ongoingSnapshotId();
boolean isExport = snapshotMapName != null;
int snapshotFlags = SnapshotFlags.create(isTerminal, isExport);
String finalMapName = isExport ? exportedSnapshotMapName(snapshotMapName) : snapshotDataMapName(mc.jobId(), mc.jobExecutionRecord().ongoingDataMapIndex());
mc.nodeEngine().getHazelcastInstance().getMap(finalMapName).clear();
logFine(logger, "Starting snapshot %d for %s, flags: %s, writing to: %s", newSnapshotId, jobNameAndExecutionId(mc.jobName(), localExecutionId), SnapshotFlags.toString(snapshotFlags), snapshotMapName);
Function<ExecutionPlan, Operation> factory = plan -> new SnapshotPhase1Operation(mc.jobId(), localExecutionId, newSnapshotId, finalMapName, snapshotFlags);
// Need to take a copy of executionId: we don't cancel the scheduled task when the execution
// finalizes. If a new execution is started in the meantime, we'll use the execution ID to detect it.
mc.invokeOnParticipants(factory, responses -> onSnapshotPhase1Complete(responses, localExecutionId, newSnapshotId, finalMapName, snapshotFlags, future), null, true);
});
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast by hazelcast.
the class ExecutionLifecycleTest method when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute.
@Test
public void when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute() {
// not applicable to light jobs - we hack around with ExecutionContext
assumeFalse(useLightJob);
// Given
DAG dag = new DAG().vertex(new Vertex("test", new MockPS(NoOutputSourceP::new, MEMBER_COUNT)));
NodeEngineImpl nodeEngineImpl = getNodeEngineImpl(instance());
Address localAddress = nodeEngineImpl.getThisAddress();
ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngineImpl.getClusterService();
MembersView membersView = clusterService.getMembershipManager().getMembersView();
int memberListVersion = membersView.getVersion();
JetServiceBackend jetServiceBackend = getJetServiceBackend(instance());
long jobId = 0;
long executionId = 1;
JobConfig jobConfig = new JobConfig();
final Map<MemberInfo, ExecutionPlan> executionPlans = ExecutionPlanBuilder.createExecutionPlans(nodeEngineImpl, membersView.getMembers(), dag, jobId, executionId, jobConfig, NO_SNAPSHOT, false, null);
ExecutionPlan executionPlan = executionPlans.get(membersView.getMember(localAddress));
jetServiceBackend.getJobClassLoaderService().getOrCreateClassLoader(jobConfig, jobId, COORDINATOR);
Set<MemberInfo> participants = new HashSet<>(membersView.getMembers());
jetServiceBackend.getJobExecutionService().initExecution(jobId, executionId, localAddress, memberListVersion, participants, executionPlan);
ExecutionContext executionContext = jetServiceBackend.getJobExecutionService().getExecutionContext(executionId);
executionContext.terminateExecution(null);
// When
CompletableFuture<Void> future = executionContext.beginExecution(jetServiceBackend.getTaskletExecutionService());
// Then
expectedException.expect(CancellationException.class);
future.join();
}
use of com.hazelcast.jet.impl.execution.init.ExecutionPlan in project hazelcast by hazelcast.
the class JobExecutionService method runLightJob.
public CompletableFuture<RawJobMetrics> runLightJob(long jobId, long executionId, Address coordinator, int coordinatorMemberListVersion, Set<MemberInfo> participants, ExecutionPlan plan) {
assert executionId == jobId : "executionId(" + idToString(executionId) + ") != jobId(" + idToString(jobId) + ")";
verifyClusterInformation(jobId, executionId, coordinator, coordinatorMemberListVersion, participants);
failIfNotRunning();
ExecutionContext execCtx;
synchronized (mutex) {
addExecutionContextJobId(jobId, executionId, coordinator);
execCtx = executionContexts.computeIfAbsent(executionId, x -> new ExecutionContext(nodeEngine, jobId, executionId, true));
}
try {
Set<Address> addresses = participants.stream().map(MemberInfo::getAddress).collect(toSet());
ClassLoader jobCl = jobClassloaderService.getClassLoader(jobId);
// We don't create the CL for light jobs.
assert jobClassloaderService.getClassLoader(jobId) == null;
doWithClassLoader(jobCl, () -> execCtx.initialize(coordinator, addresses, plan));
} catch (Throwable e) {
completeExecution(execCtx, new CancellationException());
throw e;
}
// initial log entry with all of jobId, jobName, executionId
if (logger.isFineEnabled()) {
logger.fine("Execution plan for light job ID=" + idToString(jobId) + ", jobName=" + (execCtx.jobName() != null ? '\'' + execCtx.jobName() + '\'' : "null") + ", executionId=" + idToString(executionId) + " initialized, will start the execution");
}
return beginExecution0(execCtx, false);
}
Aggregations