use of com.hazelcast.jet.impl.operation.InitExecutionOperation in project hazelcast-jet by hazelcast.
the class MasterContext method tryStartJob.
/**
* Starts execution of the job if it is not already completed, cancelled or failed.
* If the job is already cancelled, the job completion procedure is triggered.
* If the job quorum is not satisfied, job restart is rescheduled.
* If there was a membership change and the partition table is not completely
* fixed yet, job restart is rescheduled.
*/
void tryStartJob(Function<Long, Long> executionIdSupplier) {
if (!setJobStatusToStarting()) {
return;
}
if (scheduleRestartIfQuorumAbsent() || scheduleRestartIfClusterIsNotSafe()) {
return;
}
DAG dag;
try {
dag = deserializeDAG();
} catch (Exception e) {
logger.warning("DAG deserialization failed", e);
finalizeJob(e);
return;
}
// save a copy of the vertex list, because it is going to change
vertices = new HashSet<>();
dag.iterator().forEachRemaining(vertices::add);
executionId = executionIdSupplier.apply(jobId);
// last started snapshot complete or not complete. The next started snapshot must be greater than this number
long lastSnapshotId = NO_SNAPSHOT;
if (isSnapshottingEnabled()) {
Long snapshotIdToRestore = snapshotRepository.latestCompleteSnapshot(jobId);
snapshotRepository.deleteAllSnapshotsExceptOne(jobId, snapshotIdToRestore);
Long lastStartedSnapshot = snapshotRepository.latestStartedSnapshot(jobId);
if (snapshotIdToRestore != null) {
logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotIdToRestore);
rewriteDagWithSnapshotRestore(dag, snapshotIdToRestore);
} else {
logger.info("No previous snapshot for " + jobIdString() + " found.");
}
if (lastStartedSnapshot != null) {
lastSnapshotId = lastStartedSnapshot;
}
}
MembersView membersView = getMembersView();
ClassLoader previousCL = swapContextClassLoader(coordinationService.getClassLoader(jobId));
try {
int defaultLocalParallelism = getJetInstance(nodeEngine).getConfig().getInstanceConfig().getCooperativeThreadCount();
logger.info("Start executing " + jobIdString() + ", status " + jobStatus() + "\n" + dag.toString(defaultLocalParallelism));
logger.fine("Building execution plan for " + jobIdString());
executionPlanMap = createExecutionPlans(nodeEngine, membersView, dag, getJobConfig(), lastSnapshotId);
} catch (Exception e) {
logger.severe("Exception creating execution plan for " + jobIdString(), e);
finalizeJob(e);
return;
} finally {
Thread.currentThread().setContextClassLoader(previousCL);
}
logger.fine("Built execution plans for " + jobIdString());
Set<MemberInfo> participants = executionPlanMap.keySet();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(jobId, executionId, membersView.getVersion(), participants, nodeEngine.getSerializationService().toData(plan));
invoke(operationCtor, this::onInitStepCompleted, null);
}
use of com.hazelcast.jet.impl.operation.InitExecutionOperation in project hazelcast by hazelcast.
the class MasterJobContext method tryStartJob.
/**
* Starts the execution of the job if it is not already completed,
* cancelled or failed.
* <p>
* If the job is already cancelled, triggers the job completion procedure.
* <p>
* If the job quorum is not satisfied, reschedules the job restart.
* <p>
* If there was a membership change and the partition table is not completely
* fixed yet, reschedules the job restart.
*/
void tryStartJob(Supplier<Long> executionIdSupplier) {
mc.coordinationService().submitToCoordinatorThread(() -> {
executionStartTime = System.currentTimeMillis();
try {
JobExecutionRecord jobExecRec = mc.jobExecutionRecord();
jobExecRec.markExecuted();
Tuple2<DAG, ClassLoader> dagAndClassloader = resolveDagAndCL(executionIdSupplier);
if (dagAndClassloader == null) {
return;
}
DAG dag = dagAndClassloader.f0();
assert dag != null;
ClassLoader classLoader = dagAndClassloader.f1();
// must call this before rewriteDagWithSnapshotRestore()
String dotRepresentation = dag.toDotString(defaultParallelism, defaultQueueSize);
long snapshotId = jobExecRec.snapshotId();
String snapshotName = mc.jobConfig().getInitialSnapshotName();
String mapName = snapshotId >= 0 ? jobExecRec.successfulSnapshotDataMapName(mc.jobId()) : snapshotName != null ? EXPORTED_SNAPSHOTS_PREFIX + snapshotName : null;
if (mapName != null) {
rewriteDagWithSnapshotRestore(dag, snapshotId, mapName, snapshotName);
} else {
logger.info("Didn't find any snapshot to restore for " + mc.jobIdString());
}
MembersView membersView = Util.getMembersView(mc.nodeEngine());
logger.info("Start executing " + mc.jobIdString() + ", execution graph in DOT format:\n" + dotRepresentation + "\nHINT: You can use graphviz or http://viz-js.com to visualize the printed graph.");
logger.fine("Building execution plan for " + mc.jobIdString());
Util.doWithClassLoader(classLoader, () -> mc.setExecutionPlanMap(createExecutionPlans(mc.nodeEngine(), membersView.getMembers(), dag, mc.jobId(), mc.executionId(), mc.jobConfig(), jobExecRec.ongoingSnapshotId(), false, mc.jobRecord().getSubject())));
logger.fine("Built execution plans for " + mc.jobIdString());
Set<MemberInfo> participants = mc.executionPlanMap().keySet();
Version coordinatorVersion = mc.nodeEngine().getLocalMember().getVersion().asVersion();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(mc.jobId(), mc.executionId(), membersView.getVersion(), coordinatorVersion, participants, mc.nodeEngine().getSerializationService().toData(plan), false);
mc.invokeOnParticipants(operationCtor, this::onInitStepCompleted, null, false);
} catch (Throwable e) {
finalizeJob(e);
}
});
}
use of com.hazelcast.jet.impl.operation.InitExecutionOperation in project hazelcast by hazelcast.
the class TopologyChangeTest method when_nodeIsNotJobParticipant_then_initFails.
@Test
public void when_nodeIsNotJobParticipant_then_initFails() throws Throwable {
final long jobId = 1;
final long executionId = 1;
HazelcastInstance master = instances[0];
int memberListVersion = Accessors.getClusterService(master).getMemberListVersion();
Set<MemberInfo> memberInfos = new HashSet<>();
for (int i = 1; i < instances.length; i++) {
memberInfos.add(new MemberInfo(getNode(instances[i]).getLocalMember()));
}
Version version = instances[0].getCluster().getLocalMember().getVersion().asVersion();
JobRecord jobRecord = new JobRecord(version, jobId, null, "", new JobConfig(), Collections.emptySet(), null);
instances[0].getMap(JOB_RECORDS_MAP_NAME).put(jobId, jobRecord);
InitExecutionOperation op = new InitExecutionOperation(jobId, executionId, memberListVersion, version, memberInfos, null, false);
Future<Object> future = Accessors.getOperationService(master).createInvocationBuilder(JetServiceBackend.SERVICE_NAME, op, Accessors.getAddress(master)).invoke();
try {
future.get();
fail();
} catch (ExecutionException e) {
assertInstanceOf(IllegalArgumentException.class, e.getCause());
assertTrue("Expected: contains 'is not in participants'\nActual: '" + e.getMessage() + "'", e.getMessage().contains("is not in participants"));
}
}
use of com.hazelcast.jet.impl.operation.InitExecutionOperation in project hazelcast-jet by hazelcast.
the class TopologyChangeTest method when_nodeIsNotJobParticipant_then_initFails.
@Test
public void when_nodeIsNotJobParticipant_then_initFails() throws Throwable {
int jobId = 1;
int executionId = 1;
HazelcastInstance master = instances[0].getHazelcastInstance();
int memberListVersion = getClusterService(master).getMemberListVersion();
Set<MemberInfo> memberInfos = new HashSet<>();
for (int i = 1; i < instances.length; i++) {
memberInfos.add(new MemberInfo(getNode(instances[i].getHazelcastInstance()).getLocalMember()));
}
InitExecutionOperation op = new InitExecutionOperation(jobId, executionId, memberListVersion, memberInfos, null);
Future<Object> future = getOperationService(master).createInvocationBuilder(JetService.SERVICE_NAME, op, getAddress(master)).invoke();
try {
future.get();
fail();
} catch (ExecutionException e) {
assertInstanceOf(IllegalArgumentException.class, e.getCause());
}
}
Aggregations