use of com.hazelcast.jet.impl.JetServiceBackend in project hazelcast by hazelcast.
the class ManualRestartTest method when_terminalSnapshotFails_then_previousSnapshotUsed.
@Test
public void when_terminalSnapshotFails_then_previousSnapshotUsed() {
MapConfig mapConfig = new MapConfig(JobRepository.SNAPSHOT_DATA_MAP_PREFIX + "*");
mapConfig.getMapStoreConfig().setClassName(FailingMapStore.class.getName()).setEnabled(true);
Config config = instances[0].getConfig();
((DynamicConfigurationAwareConfig) config).getStaticConfig().addMapConfig(mapConfig);
FailingMapStore.fail = false;
FailingMapStore.failed = false;
DAG dag = new DAG();
Vertex source = dag.newVertex("source", throttle(() -> new SequencesInPartitionsGeneratorP(2, 10000, true), 1000));
Vertex sink = dag.newVertex("sink", writeListP("sink"));
dag.edge(between(source, sink));
source.localParallelism(1);
Job job = instances[0].getJet().newJob(dag, new JobConfig().setProcessingGuarantee(EXACTLY_ONCE).setSnapshotIntervalMillis(2000));
// wait for the first snapshot
JetServiceBackend jetServiceBackend = getNode(instances[0]).nodeEngine.getService(JetServiceBackend.SERVICE_NAME);
JobRepository jobRepository = jetServiceBackend.getJobCoordinationService().jobRepository();
assertJobStatusEventually(job, RUNNING);
assertTrueEventually(() -> assertTrue(jobRepository.getJobExecutionRecord(job.getId()).dataMapIndex() >= 0));
// When
sleepMillis(100);
FailingMapStore.fail = true;
job.restart();
assertTrueEventually(() -> assertTrue(FailingMapStore.failed));
FailingMapStore.fail = false;
job.join();
Map<Integer, Integer> actual = new ArrayList<>(instances[0].<Entry<Integer, Integer>>getList("sink")).stream().filter(// we'll only check partition 0
e -> e.getKey() == 0).map(Entry::getValue).collect(Collectors.toMap(e -> e, e -> 1, (o, n) -> o + n, TreeMap::new));
assertEquals("first item != 1, " + actual.toString(), (Integer) 1, actual.get(0));
assertEquals("last item != 1, " + actual.toString(), (Integer) 1, actual.get(9999));
// the result should be some ones, then some twos and then some ones. The twos should be during the time
// since the last successful snapshot until the actual termination, when there was reprocessing.
boolean sawTwo = false;
boolean sawOneAgain = false;
for (Integer v : actual.values()) {
if (v == 1) {
if (sawTwo) {
sawOneAgain = true;
}
} else if (v == 2) {
assertFalse("got a 2 in another group", sawOneAgain);
sawTwo = true;
} else {
fail("v=" + v);
}
}
assertTrue("didn't see any 2s", sawTwo);
}
use of com.hazelcast.jet.impl.JetServiceBackend in project hazelcast by hazelcast.
the class ExecutionLifecycleTest method when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute.
@Test
public void when_executionCancelledBeforeStart_then_jobFutureIsCancelledOnExecute() {
// not applicable to light jobs - we hack around with ExecutionContext
assumeFalse(useLightJob);
// Given
DAG dag = new DAG().vertex(new Vertex("test", new MockPS(NoOutputSourceP::new, MEMBER_COUNT)));
NodeEngineImpl nodeEngineImpl = getNodeEngineImpl(instance());
Address localAddress = nodeEngineImpl.getThisAddress();
ClusterServiceImpl clusterService = (ClusterServiceImpl) nodeEngineImpl.getClusterService();
MembersView membersView = clusterService.getMembershipManager().getMembersView();
int memberListVersion = membersView.getVersion();
JetServiceBackend jetServiceBackend = getJetServiceBackend(instance());
long jobId = 0;
long executionId = 1;
JobConfig jobConfig = new JobConfig();
final Map<MemberInfo, ExecutionPlan> executionPlans = ExecutionPlanBuilder.createExecutionPlans(nodeEngineImpl, membersView.getMembers(), dag, jobId, executionId, jobConfig, NO_SNAPSHOT, false, null);
ExecutionPlan executionPlan = executionPlans.get(membersView.getMember(localAddress));
jetServiceBackend.getJobClassLoaderService().getOrCreateClassLoader(jobConfig, jobId, COORDINATOR);
Set<MemberInfo> participants = new HashSet<>(membersView.getMembers());
jetServiceBackend.getJobExecutionService().initExecution(jobId, executionId, localAddress, memberListVersion, participants, executionPlan);
ExecutionContext executionContext = jetServiceBackend.getJobExecutionService().getExecutionContext(executionId);
executionContext.terminateExecution(null);
// When
CompletableFuture<Void> future = executionContext.beginExecution(jetServiceBackend.getTaskletExecutionService());
// Then
expectedException.expect(CancellationException.class);
future.join();
}
use of com.hazelcast.jet.impl.JetServiceBackend in project hazelcast by hazelcast.
the class SqlClientTest method when_resultClosed_then_executionContextCleanedUp.
// test for https://github.com/hazelcast/hazelcast/issues/19897
@Test
public void when_resultClosed_then_executionContextCleanedUp() {
HazelcastInstance client = factory().newHazelcastClient();
SqlService sql = client.getSql();
IMap<Integer, Integer> map = instance().getMap("map");
Map<Integer, Integer> tmpMap = new HashMap<>();
for (int i = 0; i < 100_000; i++) {
tmpMap.put(i, i);
if (i % 10_000 == 0) {
map.putAll(tmpMap);
tmpMap.clear();
}
}
createMapping("map", Integer.class, Integer.class);
for (int i = 0; i < 100; i++) {
SqlResult result = sql.execute("SELECT * FROM map");
result.close();
}
JetServiceBackend jetService = getJetServiceBackend(instance());
Collection<ExecutionContext> contexts = jetService.getJobExecutionService().getExecutionContexts();
// Assert that all ExecutionContexts are eventually cleaned up
// This assert will fail if a network packet arrives after the JobExecutionService#FAILED_EXECUTION_EXPIRY_NS
// time. Hopefully Jenkins isn't that slow.
assertTrueEventually(() -> {
String remainingContexts = contexts.stream().map(c -> idToString(c.executionId())).collect(Collectors.joining(", "));
assertEquals("remaining execIds: " + remainingContexts, 0, contexts.size());
}, 5);
// assert that failedJobs is also cleaned up
ConcurrentMap<Long, Long> failedJobs = jetService.getJobExecutionService().getFailedJobs();
assertTrueEventually(() -> assertEquals(0, failedJobs.size()));
}
use of com.hazelcast.jet.impl.JetServiceBackend in project hazelcast by hazelcast.
the class SnapshotPhase1Operation method doRun.
@Override
protected CompletableFuture<SnapshotPhase1Result> doRun() {
JetServiceBackend service = getJetServiceBackend();
ExecutionContext ctx = service.getJobExecutionService().assertExecutionContext(getCallerAddress(), jobId(), executionId, getClass().getSimpleName());
assert !ctx.isLightJob() : "snapshot phase 1 started on a light job: " + idToString(executionId);
CompletableFuture<SnapshotPhase1Result> future = ctx.beginSnapshotPhase1(snapshotId, mapName, flags).exceptionally(exc -> new SnapshotPhase1Result(0, 0, 0, exc)).thenApply(result -> {
if (result.getError() == null) {
logFine(getLogger(), "Snapshot %s phase 1 for %s finished successfully on member", snapshotId, ctx.jobNameAndExecutionId());
} else {
getLogger().warning(String.format("Snapshot %d phase 1 for %s finished with an error on member: " + "%s", snapshotId, ctx.jobNameAndExecutionId(), result.getError()));
}
return result;
});
if (!postponeResponses) {
return future;
}
return future.thenCompose(result -> {
CompletableFuture<SnapshotPhase1Result> f2 = new CompletableFuture<>();
tryCompleteLater(result, f2);
return f2;
});
}
use of com.hazelcast.jet.impl.JetServiceBackend in project hazelcast by hazelcast.
the class ExecutionPlanBuilder method createExecutionPlans.
@SuppressWarnings("checkstyle:ParameterNumber")
public static Map<MemberInfo, ExecutionPlan> createExecutionPlans(NodeEngineImpl nodeEngine, List<MemberInfo> memberInfos, DAG dag, long jobId, long executionId, JobConfig jobConfig, long lastSnapshotId, boolean isLightJob, Subject subject) {
final int defaultParallelism = nodeEngine.getConfig().getJetConfig().getCooperativeThreadCount();
final Map<MemberInfo, int[]> partitionsByMember = getPartitionAssignment(nodeEngine, memberInfos);
final Map<Address, int[]> partitionsByAddress = partitionsByMember.entrySet().stream().collect(toMap(en -> en.getKey().getAddress(), Entry::getValue));
final List<Address> addresses = toList(partitionsByMember.keySet(), MemberInfo::getAddress);
final int clusterSize = partitionsByMember.size();
final boolean isJobDistributed = clusterSize > 1;
final EdgeConfig defaultEdgeConfig = nodeEngine.getConfig().getJetConfig().getDefaultEdgeConfig();
final Map<MemberInfo, ExecutionPlan> plans = new HashMap<>();
int memberIndex = 0;
for (MemberInfo member : partitionsByMember.keySet()) {
plans.put(member, new ExecutionPlan(partitionsByAddress, jobConfig, lastSnapshotId, memberIndex++, clusterSize, isLightJob, subject));
}
final Map<String, Integer> vertexIdMap = assignVertexIds(dag);
for (Entry<String, Integer> entry : vertexIdMap.entrySet()) {
final Vertex vertex = dag.getVertex(entry.getKey());
assert vertex != null;
final ProcessorMetaSupplier metaSupplier = vertex.getMetaSupplier();
final int vertexId = entry.getValue();
// The local parallelism determination here is effective only
// in jobs submitted as DAG. Otherwise, in jobs submitted as
// pipeline, we are already doing this determination while
// converting it to DAG and there is no vertex left with LP=-1.
final int localParallelism = vertex.determineLocalParallelism(defaultParallelism);
final int totalParallelism = localParallelism * clusterSize;
final List<EdgeDef> inbound = toEdgeDefs(dag.getInboundEdges(vertex.getName()), defaultEdgeConfig, e -> vertexIdMap.get(e.getSourceName()), isJobDistributed);
final List<EdgeDef> outbound = toEdgeDefs(dag.getOutboundEdges(vertex.getName()), defaultEdgeConfig, e -> vertexIdMap.get(e.getDestName()), isJobDistributed);
String prefix = prefix(jobConfig.getName(), jobId, vertex.getName(), "#PMS");
ILogger logger = prefixedLogger(nodeEngine.getLogger(metaSupplier.getClass()), prefix);
JetServiceBackend jetBackend = nodeEngine.getService(JetServiceBackend.SERVICE_NAME);
JobClassLoaderService jobClassLoaderService = jetBackend.getJobClassLoaderService();
ClassLoader processorClassLoader = jobClassLoaderService.getClassLoader(jobId);
try {
doWithClassLoader(processorClassLoader, () -> metaSupplier.init(new MetaSupplierCtx(nodeEngine, jobId, executionId, jobConfig, logger, vertex.getName(), localParallelism, totalParallelism, clusterSize, isLightJob, partitionsByAddress, subject, processorClassLoader)));
} catch (Exception e) {
throw sneakyThrow(e);
}
Function<? super Address, ? extends ProcessorSupplier> procSupplierFn = doWithClassLoader(processorClassLoader, () -> metaSupplier.get(addresses));
for (Entry<MemberInfo, ExecutionPlan> e : plans.entrySet()) {
final ProcessorSupplier processorSupplier = doWithClassLoader(processorClassLoader, () -> procSupplierFn.apply(e.getKey().getAddress()));
if (!isLightJob) {
// We avoid the check for light jobs - the user will get the error anyway, but maybe with less information.
// And we can recommend the user to use normal job to have more checks.
checkSerializable(processorSupplier, "ProcessorSupplier in vertex '" + vertex.getName() + '\'');
}
final VertexDef vertexDef = new VertexDef(vertexId, vertex.getName(), processorSupplier, localParallelism);
vertexDef.addInboundEdges(inbound);
vertexDef.addOutboundEdges(outbound);
e.getValue().addVertex(vertexDef);
}
}
return plans;
}
Aggregations