use of com.hazelcast.internal.cluster.impl.MembersView in project hazelcast-jet by hazelcast.
the class MasterContext method tryStartJob.
/**
* Starts execution of the job if it is not already completed, cancelled or failed.
* If the job is already cancelled, the job completion procedure is triggered.
* If the job quorum is not satisfied, job restart is rescheduled.
* If there was a membership change and the partition table is not completely
* fixed yet, job restart is rescheduled.
*/
void tryStartJob(Function<Long, Long> executionIdSupplier) {
if (!setJobStatusToStarting()) {
return;
}
if (scheduleRestartIfQuorumAbsent() || scheduleRestartIfClusterIsNotSafe()) {
return;
}
DAG dag;
try {
dag = deserializeDAG();
} catch (Exception e) {
logger.warning("DAG deserialization failed", e);
finalizeJob(e);
return;
}
// save a copy of the vertex list, because it is going to change
vertices = new HashSet<>();
dag.iterator().forEachRemaining(vertices::add);
executionId = executionIdSupplier.apply(jobId);
// last started snapshot complete or not complete. The next started snapshot must be greater than this number
long lastSnapshotId = NO_SNAPSHOT;
if (isSnapshottingEnabled()) {
Long snapshotIdToRestore = snapshotRepository.latestCompleteSnapshot(jobId);
snapshotRepository.deleteAllSnapshotsExceptOne(jobId, snapshotIdToRestore);
Long lastStartedSnapshot = snapshotRepository.latestStartedSnapshot(jobId);
if (snapshotIdToRestore != null) {
logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotIdToRestore);
rewriteDagWithSnapshotRestore(dag, snapshotIdToRestore);
} else {
logger.info("No previous snapshot for " + jobIdString() + " found.");
}
if (lastStartedSnapshot != null) {
lastSnapshotId = lastStartedSnapshot;
}
}
MembersView membersView = getMembersView();
ClassLoader previousCL = swapContextClassLoader(coordinationService.getClassLoader(jobId));
try {
int defaultLocalParallelism = getJetInstance(nodeEngine).getConfig().getInstanceConfig().getCooperativeThreadCount();
logger.info("Start executing " + jobIdString() + ", status " + jobStatus() + "\n" + dag.toString(defaultLocalParallelism));
logger.fine("Building execution plan for " + jobIdString());
executionPlanMap = createExecutionPlans(nodeEngine, membersView, dag, getJobConfig(), lastSnapshotId);
} catch (Exception e) {
logger.severe("Exception creating execution plan for " + jobIdString(), e);
finalizeJob(e);
return;
} finally {
Thread.currentThread().setContextClassLoader(previousCL);
}
logger.fine("Built execution plans for " + jobIdString());
Set<MemberInfo> participants = executionPlanMap.keySet();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(jobId, executionId, membersView.getVersion(), participants, nodeEngine.getSerializationService().toData(plan));
invoke(operationCtor, this::onInitStepCompleted, null);
}
use of com.hazelcast.internal.cluster.impl.MembersView in project hazelcast by hazelcast.
the class MasterJobContext method tryStartJob.
/**
* Starts the execution of the job if it is not already completed,
* cancelled or failed.
* <p>
* If the job is already cancelled, triggers the job completion procedure.
* <p>
* If the job quorum is not satisfied, reschedules the job restart.
* <p>
* If there was a membership change and the partition table is not completely
* fixed yet, reschedules the job restart.
*/
void tryStartJob(Supplier<Long> executionIdSupplier) {
mc.coordinationService().submitToCoordinatorThread(() -> {
executionStartTime = System.currentTimeMillis();
try {
JobExecutionRecord jobExecRec = mc.jobExecutionRecord();
jobExecRec.markExecuted();
Tuple2<DAG, ClassLoader> dagAndClassloader = resolveDagAndCL(executionIdSupplier);
if (dagAndClassloader == null) {
return;
}
DAG dag = dagAndClassloader.f0();
assert dag != null;
ClassLoader classLoader = dagAndClassloader.f1();
// must call this before rewriteDagWithSnapshotRestore()
String dotRepresentation = dag.toDotString(defaultParallelism, defaultQueueSize);
long snapshotId = jobExecRec.snapshotId();
String snapshotName = mc.jobConfig().getInitialSnapshotName();
String mapName = snapshotId >= 0 ? jobExecRec.successfulSnapshotDataMapName(mc.jobId()) : snapshotName != null ? EXPORTED_SNAPSHOTS_PREFIX + snapshotName : null;
if (mapName != null) {
rewriteDagWithSnapshotRestore(dag, snapshotId, mapName, snapshotName);
} else {
logger.info("Didn't find any snapshot to restore for " + mc.jobIdString());
}
MembersView membersView = Util.getMembersView(mc.nodeEngine());
logger.info("Start executing " + mc.jobIdString() + ", execution graph in DOT format:\n" + dotRepresentation + "\nHINT: You can use graphviz or http://viz-js.com to visualize the printed graph.");
logger.fine("Building execution plan for " + mc.jobIdString());
Util.doWithClassLoader(classLoader, () -> mc.setExecutionPlanMap(createExecutionPlans(mc.nodeEngine(), membersView.getMembers(), dag, mc.jobId(), mc.executionId(), mc.jobConfig(), jobExecRec.ongoingSnapshotId(), false, mc.jobRecord().getSubject())));
logger.fine("Built execution plans for " + mc.jobIdString());
Set<MemberInfo> participants = mc.executionPlanMap().keySet();
Version coordinatorVersion = mc.nodeEngine().getLocalMember().getVersion().asVersion();
Function<ExecutionPlan, Operation> operationCtor = plan -> new InitExecutionOperation(mc.jobId(), mc.executionId(), membersView.getVersion(), coordinatorVersion, participants, mc.nodeEngine().getSerializationService().toData(plan), false);
mc.invokeOnParticipants(operationCtor, this::onInitStepCompleted, null, false);
} catch (Throwable e) {
finalizeJob(e);
}
});
}
use of com.hazelcast.internal.cluster.impl.MembersView in project hazelcast by hazelcast.
the class ClusterViewListenerService method getMemberListViewMessage.
private ClientMessage getMemberListViewMessage() {
MembershipManager membershipManager = ((ClusterServiceImpl) nodeEngine.getClusterService()).getMembershipManager();
MembersView membersView = membershipManager.getMembersView();
int version = membersView.getVersion();
List<MemberInfo> members = membersView.getMembers();
ArrayList<MemberInfo> memberInfos = new ArrayList<>();
for (MemberInfo member : members) {
memberInfos.add(new MemberInfo(clientAddressOf(member.getAddress()), member.getUuid(), member.getAttributes(), member.isLiteMember(), member.getVersion(), member.getAddressMap()));
}
return ClientAddClusterViewListenerCodec.encodeMembersViewEvent(version, memberInfos);
}
use of com.hazelcast.internal.cluster.impl.MembersView in project hazelcast by hazelcast.
the class ExecutionLifecycleTest method when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute.
@Test
public void when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute() {
// not applicable to light jobs - we hack around with ExecutionContext
assumeFalse(useLightJob);
// Given
DAG dag = new DAG().vertex(new Vertex("test", new MockPS(NoOutputSourceP::new, MEMBER_COUNT)));
NodeEngineImpl nodeEngineImpl = getNodeEngineImpl(instance());
Address localAddress = nodeEngineImpl.getThisAddress();
ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngineImpl.getClusterService();
MembersView membersView = clusterService.getMembershipManager().getMembersView();
int memberListVersion = membersView.getVersion();
JetServiceBackend jetServiceBackend = getJetServiceBackend(instance());
long jobId = 0;
long executionId = 1;
JobConfig jobConfig = new JobConfig();
final Map<MemberInfo, ExecutionPlan> executionPlans = ExecutionPlanBuilder.createExecutionPlans(nodeEngineImpl, membersView.getMembers(), dag, jobId, executionId, jobConfig, NO_SNAPSHOT, false, null);
ExecutionPlan executionPlan = executionPlans.get(membersView.getMember(localAddress));
jetServiceBackend.getJobClassLoaderService().getOrCreateClassLoader(jobConfig, jobId, COORDINATOR);
Set<MemberInfo> participants = new HashSet<>(membersView.getMembers());
jetServiceBackend.getJobExecutionService().initExecution(jobId, executionId, localAddress, memberListVersion, participants, executionPlan);
ExecutionContext executionContext = jetServiceBackend.getJobExecutionService().getExecutionContext(executionId);
executionContext.terminateExecution(null);
// When
CompletableFuture<Void> future = executionContext.beginExecution(jetServiceBackend.getTaskletExecutionService());
// Then
expectedException.expect(CancellationException.class);
future.join();
}
use of com.hazelcast.internal.cluster.impl.MembersView in project hazelcast-jet by hazelcast.
the class ExecutionPlanBuilder method createExecutionPlans.
public static Map<MemberInfo, ExecutionPlan> createExecutionPlans(NodeEngine nodeEngine, MembersView membersView, DAG dag, JobConfig jobConfig, long lastSnapshotId) {
final JetInstance instance = getJetInstance(nodeEngine);
final int defaultParallelism = instance.getConfig().getInstanceConfig().getCooperativeThreadCount();
final Collection<MemberInfo> members = new HashSet<>(membersView.size());
final Address[] partitionOwners = new Address[nodeEngine.getPartitionService().getPartitionCount()];
initPartitionOwnersAndMembers(nodeEngine, membersView, members, partitionOwners);
final List<Address> addresses = members.stream().map(MemberInfo::getAddress).collect(toList());
final int clusterSize = members.size();
final boolean isJobDistributed = clusterSize > 1;
final EdgeConfig defaultEdgeConfig = instance.getConfig().getDefaultEdgeConfig();
final Map<MemberInfo, ExecutionPlan> plans = members.stream().collect(toMap(m -> m, m -> new ExecutionPlan(partitionOwners, jobConfig, lastSnapshotId)));
final Map<String, Integer> vertexIdMap = assignVertexIds(dag);
for (Entry<String, Integer> entry : vertexIdMap.entrySet()) {
final Vertex vertex = dag.getVertex(entry.getKey());
final ProcessorMetaSupplier metaSupplier = vertex.getMetaSupplier();
final int vertexId = entry.getValue();
final int localParallelism = determineParallelism(vertex, metaSupplier.preferredLocalParallelism(), defaultParallelism);
final int totalParallelism = localParallelism * clusterSize;
final List<EdgeDef> inbound = toEdgeDefs(dag.getInboundEdges(vertex.getName()), defaultEdgeConfig, e -> vertexIdMap.get(e.getSourceName()), isJobDistributed);
final List<EdgeDef> outbound = toEdgeDefs(dag.getOutboundEdges(vertex.getName()), defaultEdgeConfig, e -> vertexIdMap.get(e.getDestName()), isJobDistributed);
final ILogger logger = nodeEngine.getLogger(String.format("%s.%s#ProcessorMetaSupplier", metaSupplier.getClass().getName(), vertex.getName()));
metaSupplier.init(new MetaSupplierCtx(instance, logger, vertex.getName(), localParallelism, totalParallelism));
Function<Address, ProcessorSupplier> procSupplierFn = metaSupplier.get(addresses);
int procIdxOffset = 0;
for (Entry<MemberInfo, ExecutionPlan> e : plans.entrySet()) {
final ProcessorSupplier processorSupplier = procSupplierFn.apply(e.getKey().getAddress());
checkSerializable(processorSupplier, "ProcessorSupplier in vertex '" + vertex.getName() + '\'');
final VertexDef vertexDef = new VertexDef(vertexId, vertex.getName(), processorSupplier, procIdxOffset, localParallelism, totalParallelism);
vertexDef.addInboundEdges(inbound);
vertexDef.addOutboundEdges(outbound);
e.getValue().addVertex(vertexDef);
procIdxOffset += localParallelism;
}
}
return plans;
}
Aggregations