use of com.hazelcast.jet.impl.JobRepository in project hazelcast-jet by hazelcast.
the class SplitBrainTest method when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated.
@Test
public void when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated() {
int clusterSize = 3;
JetConfig jetConfig = new JetConfig();
JetInstance[] instances = new JetInstance[clusterSize];
for (int i = 0; i < clusterSize; i++) {
instances[i] = createJetMember(jetConfig);
}
StuckProcessor.executionStarted = new CountDownLatch(clusterSize * PARALLELISM);
MockPS processorSupplier = new MockPS(StuckProcessor::new, clusterSize);
DAG dag = new DAG().vertex(new Vertex("test", processorSupplier));
Job job = instances[0].newJob(dag, new JobConfig().setSplitBrainProtection(true));
assertOpenEventually(StuckProcessor.executionStarted);
createJetMember(jetConfig);
assertTrueEventually(() -> {
JobRepository jobRepository = getJetService(instances[0]).getJobRepository();
JobRecord jobRecord = jobRepository.getJobRecord(job.getId());
assertEquals(3, jobRecord.getQuorumSize());
});
StuckProcessor.proceedLatch.countDown();
}
use of com.hazelcast.jet.impl.JobRepository in project hazelcast-jet by hazelcast.
the class TopologyChangeTest method when_coordinatorLeavesDuringExecution_then_jobCompletes.
@Test
public void when_coordinatorLeavesDuringExecution_then_jobCompletes() throws Throwable {
// Given
DAG dag = new DAG().vertex(new Vertex("test", new MockPS(StuckProcessor::new, nodeCount)));
// When
Long jobId = null;
try {
Job job = instances[0].newJob(dag);
Future<Void> future = job.getFuture();
jobId = job.getId();
StuckProcessor.executionStarted.await();
instances[0].getHazelcastInstance().getLifecycleService().terminate();
StuckProcessor.proceedLatch.countDown();
future.get();
fail();
} catch (ExecutionException expected) {
assertTrue(expected.getCause() instanceof HazelcastInstanceNotActiveException);
}
// Then
assertNotNull(jobId);
final long completedJobId = jobId;
JobRepository jobRepository = getJetService(instances[1]).getJobRepository();
assertTrueEventually(() -> {
JobResult jobResult = jobRepository.getJobResult(completedJobId);
assertNotNull(jobResult);
assertTrue(jobResult.isSuccessful());
});
final int count = liteMemberFlags[0] ? (2 * nodeCount) : (2 * nodeCount - 1);
assertEquals(count, MockPS.initCount.get());
assertTrueEventually(() -> {
assertEquals(count, MockPS.closeCount.get());
assertEquals(nodeCount, MockPS.receivedCloseErrors.size());
for (int i = 0; i < MockPS.receivedCloseErrors.size(); i++) {
Throwable error = MockPS.receivedCloseErrors.get(i);
assertTrue(error instanceof TopologyChangedException || error instanceof HazelcastInstanceNotActiveException);
}
});
}
use of com.hazelcast.jet.impl.JobRepository in project hazelcast by hazelcast.
the class StreamKafkaPTest method integrationTest.
private void integrationTest(ProcessingGuarantee guarantee) throws Exception {
int messageCount = 20;
HazelcastInstance[] instances = new HazelcastInstance[2];
Arrays.setAll(instances, i -> createHazelcastInstance());
Pipeline p = Pipeline.create();
p.readFrom(KafkaSources.kafka(properties(), topic1Name, topic2Name)).withoutTimestamps().writeTo(Sinks.list("sink"));
JobConfig config = new JobConfig();
config.setProcessingGuarantee(guarantee);
config.setSnapshotIntervalMillis(500);
Job job = instances[0].getJet().newJob(p, config);
sleepSeconds(3);
for (int i = 0; i < messageCount; i++) {
kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
}
IList<Object> list = instances[0].getList("sink");
assertTrueEventually(() -> {
assertEquals(messageCount * 2, list.size());
for (int i = 0; i < messageCount; i++) {
Entry<Integer, String> entry1 = createEntry(i);
Entry<Integer, String> entry2 = createEntry(i - messageCount);
assertTrue("missing entry: " + entry1, list.contains(entry1));
assertTrue("missing entry: " + entry2, list.contains(entry2));
}
}, 15);
if (guarantee != ProcessingGuarantee.NONE) {
// wait until a new snapshot appears
JobRepository jr = new JobRepository(instances[0]);
long currentMax = jr.getJobExecutionRecord(job.getId()).snapshotId();
assertTrueEventually(() -> {
JobExecutionRecord jobExecutionRecord = jr.getJobExecutionRecord(job.getId());
assertNotNull("jobExecutionRecord == null", jobExecutionRecord);
long newMax = jobExecutionRecord.snapshotId();
assertTrue("no snapshot produced", newMax > currentMax);
System.out.println("snapshot " + newMax + " found, previous was " + currentMax);
});
// Bring down one member. Job should restart and drain additional items (and maybe
// some of the previous duplicately).
instances[1].getLifecycleService().terminate();
Thread.sleep(500);
for (int i = messageCount; i < 2 * messageCount; i++) {
kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
}
assertTrueEventually(() -> {
assertTrue("Not all messages were received", list.size() >= messageCount * 4);
for (int i = 0; i < 2 * messageCount; i++) {
Entry<Integer, String> entry1 = createEntry(i);
Entry<Integer, String> entry2 = createEntry(i - messageCount);
assertTrue("missing entry: " + entry1.toString(), list.contains(entry1));
assertTrue("missing entry: " + entry2.toString(), list.contains(entry2));
}
}, 10);
}
assertFalse(job.getFuture().isDone());
// cancel the job
job.cancel();
assertTrueEventually(() -> assertTrue(job.getFuture().isDone()));
}
use of com.hazelcast.jet.impl.JobRepository in project hazelcast by hazelcast.
the class JobSnapshotMetricsTest method when_snapshotCreated_then_snapshotMetricsAreEmptyForStatelessVertex.
@Test
public void when_snapshotCreated_then_snapshotMetricsAreEmptyForStatelessVertex() throws Exception {
JobConfig jobConfig = new JobConfig();
jobConfig.setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE).setSnapshotIntervalMillis(50);
Job job = instance().getJet().newJob(pipeline(), jobConfig);
JobRepository jr = new JobRepository(instance());
waitForFirstSnapshot(jr, job.getId(), 20, false);
JobMetricsChecker checker = new JobMetricsChecker(job);
assertTrueEventually(() -> checker.assertSummedMetricValueAtLeast(MetricNames.SNAPSHOT_KEYS, 1));
assertSnapshotMBeans(job, FILTER_VERTEX_NAME, 0, false);
}
use of com.hazelcast.jet.impl.JobRepository in project hazelcast by hazelcast.
the class JmsSourceIntegrationTestBase method stressTest.
private void stressTest(boolean graceful, ProcessingGuarantee maxGuarantee, boolean useTopic) throws Exception {
lastListInStressTest = null;
final int MESSAGE_COUNT = 4_000;
Pipeline p = Pipeline.create();
String destName = "queue-" + counter++;
JmsSourceBuilder sourceBuilder;
if (useTopic) {
sourceBuilder = Sources.jmsTopicBuilder(getConnectionFactory()).sharedConsumer(true).consumerFn(s -> s.createSharedDurableConsumer(s.createTopic(destName), "foo-consumer"));
// create the durable subscriber now so that it doesn't lose the initial messages
try (Connection conn = getConnectionFactory().get().createConnection()) {
conn.setClientID("foo-client-id");
try (Session sess = conn.createSession(false, DUPS_OK_ACKNOWLEDGE)) {
sess.createDurableSubscriber(sess.createTopic(destName), "foo-consumer");
}
}
} else {
sourceBuilder = Sources.jmsQueueBuilder(getConnectionFactory()).destinationName(destName);
}
p.readFrom(sourceBuilder.maxGuarantee(maxGuarantee).build(msg -> Long.parseLong(((TextMessage) msg).getText()))).withoutTimestamps().peek().mapStateful(CopyOnWriteArrayList<Long>::new, (list, item) -> {
lastListInStressTest = list;
list.add(item);
return null;
}).writeTo(Sinks.logger());
Job job = instance().getJet().newJob(p, new JobConfig().setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE).setSnapshotIntervalMillis(50));
assertJobStatusEventually(job, RUNNING);
// start a producer that will produce MESSAGE_COUNT messages on the background to the queue, 1000 msgs/s
@SuppressWarnings("rawtypes") Future producerFuture = spawn(() -> {
try (Connection connection = getConnectionFactory().get().createConnection();
Session session = connection.createSession(false, AUTO_ACKNOWLEDGE);
MessageProducer producer = session.createProducer(useTopic ? session.createTopic(destName) : session.createQueue(destName))) {
long startTime = System.nanoTime();
for (int i = 0; i < MESSAGE_COUNT; i++) {
producer.send(session.createTextMessage(String.valueOf(i)));
Thread.sleep(Math.max(0, i - NANOSECONDS.toMillis(System.nanoTime() - startTime)));
}
} catch (Exception e) {
throw sneakyThrow(e);
}
});
int iteration = 0;
JobRepository jr = new JobRepository(instance());
waitForFirstSnapshot(jr, job.getId(), 20, true);
while (!producerFuture.isDone()) {
Thread.sleep(ThreadLocalRandom.current().nextInt(200));
// We also do it before the first restart to workaround https://issues.apache.org/jira/browse/ARTEMIS-2546
if (iteration++ % 3 == 0) {
waitForNextSnapshot(jr, job.getId(), 20, true);
}
((JobProxy) job).restart(graceful);
assertJobStatusEventually(job, RUNNING);
}
// call for the side-effect of throwing if the producer failed
producerFuture.get();
assertTrueEventually(() -> {
Map<Long, Long> counts = lastListInStressTest.stream().collect(Collectors.groupingBy(Function.identity(), TreeMap::new, Collectors.counting()));
for (long i = 0; i < MESSAGE_COUNT; i++) {
counts.putIfAbsent(i, 0L);
}
String countsStr = "counts: " + counts;
if (maxGuarantee == NONE) {
// we don't assert anything and only wait little more and check that the job didn't fail
sleepSeconds(1);
} else {
// in EXACTLY_ONCE the list must have each item exactly once
// in AT_LEAST_ONCE the list must have each item at least once
assertTrue(countsStr, counts.values().stream().allMatch(cnt -> maxGuarantee == EXACTLY_ONCE ? cnt == 1 : cnt > 0));
}
logger.info(countsStr);
}, 30);
assertEquals(job.getStatus(), RUNNING);
}
Aggregations