use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.
the class StreamKafkaPTest method integrationTest.
private void integrationTest(ProcessingGuarantee guarantee) throws Exception {
int messageCount = 20;
HazelcastInstance[] instances = new HazelcastInstance[2];
Arrays.setAll(instances, i -> createHazelcastInstance());
Pipeline p = Pipeline.create();
p.readFrom(KafkaSources.kafka(properties(), topic1Name, topic2Name)).withoutTimestamps().writeTo(Sinks.list("sink"));
JobConfig config = new JobConfig();
config.setProcessingGuarantee(guarantee);
config.setSnapshotIntervalMillis(500);
Job job = instances[0].getJet().newJob(p, config);
sleepSeconds(3);
for (int i = 0; i < messageCount; i++) {
kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
}
IList<Object> list = instances[0].getList("sink");
assertTrueEventually(() -> {
assertEquals(messageCount * 2, list.size());
for (int i = 0; i < messageCount; i++) {
Entry<Integer, String> entry1 = createEntry(i);
Entry<Integer, String> entry2 = createEntry(i - messageCount);
assertTrue("missing entry: " + entry1, list.contains(entry1));
assertTrue("missing entry: " + entry2, list.contains(entry2));
}
}, 15);
if (guarantee != ProcessingGuarantee.NONE) {
// wait until a new snapshot appears
JobRepository jr = new JobRepository(instances[0]);
long currentMax = jr.getJobExecutionRecord(job.getId()).snapshotId();
assertTrueEventually(() -> {
JobExecutionRecord jobExecutionRecord = jr.getJobExecutionRecord(job.getId());
assertNotNull("jobExecutionRecord == null", jobExecutionRecord);
long newMax = jobExecutionRecord.snapshotId();
assertTrue("no snapshot produced", newMax > currentMax);
System.out.println("snapshot " + newMax + " found, previous was " + currentMax);
});
// Bring down one member. Job should restart and drain additional items (and maybe
// some of the previous duplicately).
instances[1].getLifecycleService().terminate();
Thread.sleep(500);
for (int i = messageCount; i < 2 * messageCount; i++) {
kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
}
assertTrueEventually(() -> {
assertTrue("Not all messages were received", list.size() >= messageCount * 4);
for (int i = 0; i < 2 * messageCount; i++) {
Entry<Integer, String> entry1 = createEntry(i);
Entry<Integer, String> entry2 = createEntry(i - messageCount);
assertTrue("missing entry: " + entry1.toString(), list.contains(entry1));
assertTrue("missing entry: " + entry2.toString(), list.contains(entry2));
}
}, 10);
}
assertFalse(job.getFuture().isDone());
// cancel the job
job.cancel();
assertTrueEventually(() -> assertTrue(job.getFuture().isDone()));
}
use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.
the class OperationLossTest method when_snapshotOperationLost_then_retried.
@Test
public void when_snapshotOperationLost_then_retried() {
PacketFiltersUtil.dropOperationsFrom(instance(), JetInitDataSerializerHook.FACTORY_ID, singletonList(JetInitDataSerializerHook.SNAPSHOT_PHASE1_OPERATION));
DAG dag = new DAG();
Vertex v1 = dag.newVertex("v1", () -> new DummyStatefulP()).localParallelism(1);
Vertex v2 = dag.newVertex("v2", mapP(identity())).localParallelism(1);
dag.edge(between(v1, v2).distributed());
Job job = instance().getJet().newJob(dag, new JobConfig().setProcessingGuarantee(EXACTLY_ONCE).setSnapshotIntervalMillis(100));
assertJobStatusEventually(job, RUNNING);
JobRepository jobRepository = new JobRepository(instance());
assertTrueEventually(() -> {
JobExecutionRecord record = jobRepository.getJobExecutionRecord(job.getId());
assertNotNull("null JobExecutionRecord", record);
assertEquals("ongoingSnapshotId", 0, record.ongoingSnapshotId());
}, 20);
sleepSeconds(1);
// now lift the filter and check that a snapshot is done
logger.info("Lifting the packet filter...");
PacketFiltersUtil.resetPacketFiltersFrom(instance());
waitForFirstSnapshot(jobRepository, job.getId(), 10, false);
cancelAndJoin(job);
}
use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.
the class JobTimeoutClusterTest method when_masterFails_timedOutJobIsCancelled.
@Test
public void when_masterFails_timedOutJobIsCancelled() {
final HazelcastInstance[] instances = createHazelcastInstances(2);
final HazelcastInstance oldMaster = instances[0];
final HazelcastInstance newMaster = instances[1];
assertClusterSizeEventually(2, newMaster);
assertClusterStateEventually(ClusterState.ACTIVE, newMaster);
final DAG dag = new DAG();
dag.newVertex("stuck", () -> new MockP().streaming());
final JobConfig jobConfig = new JobConfig().setTimeoutMillis(10000L).setSnapshotIntervalMillis(1L).setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE);
final Job job = oldMaster.getJet().newJob(dag, jobConfig);
final long jobId = job.getId();
// start and wait for the job to start running
assertJobStatusEventually(job, JobStatus.RUNNING);
final JobRepository oldJobRepository = new JobRepository(oldMaster);
assertTrueEventually(() -> {
final JobExecutionRecord record = oldJobRepository.getJobExecutionRecord(jobId);
assertTrue(record.snapshotId() > 0);
});
// kill old master and wait for the cluster to reconfigure
oldMaster.getLifecycleService().terminate();
assertClusterStateEventually(ClusterState.ACTIVE, newMaster);
assertClusterSize(1, newMaster);
// wait for the job to be restarted and cancelled due to timeout
final Job restartedJob = newMaster.getJet().getJob(jobId);
assertNotNull(restartedJob);
assertJobStatusEventually(restartedJob, JobStatus.FAILED);
}
use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.
the class SplitBrainTest method when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated.
@Test
public void when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated() {
int clusterSize = 3;
HazelcastInstance[] instances = new HazelcastInstance[clusterSize];
for (int i = 0; i < clusterSize; i++) {
instances[i] = createHazelcastInstance(createConfig());
}
NoOutputSourceP.executionStarted = new CountDownLatch(clusterSize * PARALLELISM);
MockPS processorSupplier = new MockPS(NoOutputSourceP::new, clusterSize);
DAG dag = new DAG().vertex(new Vertex("test", processorSupplier).localParallelism(PARALLELISM));
Job job = instances[0].getJet().newJob(dag, new JobConfig().setSplitBrainProtection(true));
assertOpenEventually(NoOutputSourceP.executionStarted);
createHazelcastInstance(createConfig());
assertTrueEventually(() -> {
JetServiceBackend service = getJetServiceBackend(instances[0]);
JobRepository jobRepository = service.getJobRepository();
JobExecutionRecord record = jobRepository.getJobExecutionRecord(job.getId());
assertEquals(3, record.getQuorumSize());
MasterContext masterContext = service.getJobCoordinationService().getMasterContext(job.getId());
assertEquals(3, masterContext.jobExecutionRecord().getQuorumSize());
});
NoOutputSourceP.proceedLatch.countDown();
}
use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.
the class JetTestSupport method waitForNextSnapshot.
public void waitForNextSnapshot(JobRepository jr, long jobId, int timeoutSeconds, boolean allowEmptySnapshot) {
long originalSnapshotId = jr.getJobExecutionRecord(jobId).snapshotId();
// wait until there is at least one more snapshot
long[] snapshotId = { -1 };
long start = System.nanoTime();
assertTrueEventually(() -> {
JobExecutionRecord record = jr.getJobExecutionRecord(jobId);
assertNotNull("jobExecutionRecord is null", record);
snapshotId[0] = record.snapshotId();
assertTrue("No more snapshots produced in " + timeoutSeconds + " seconds", snapshotId[0] > originalSnapshotId);
assertTrue("stats are 0", allowEmptySnapshot || record.snapshotStats().numBytes() > 0);
}, timeoutSeconds);
SUPPORT_LOGGER.info("Next snapshot found after " + NANOSECONDS.toMillis(System.nanoTime() - start) + " ms (id=" + snapshotId[0] + ", previous id=" + originalSnapshotId + ")");
}
Aggregations