Search in sources :

Example 1 with JobExecutionRecord

use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.

the class StreamKafkaPTest method integrationTest.

private void integrationTest(ProcessingGuarantee guarantee) throws Exception {
    int messageCount = 20;
    HazelcastInstance[] instances = new HazelcastInstance[2];
    Arrays.setAll(instances, i -> createHazelcastInstance());
    Pipeline p = Pipeline.create();
    p.readFrom(KafkaSources.kafka(properties(), topic1Name, topic2Name)).withoutTimestamps().writeTo(Sinks.list("sink"));
    JobConfig config = new JobConfig();
    config.setProcessingGuarantee(guarantee);
    config.setSnapshotIntervalMillis(500);
    Job job = instances[0].getJet().newJob(p, config);
    sleepSeconds(3);
    for (int i = 0; i < messageCount; i++) {
        kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
        kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
    }
    IList<Object> list = instances[0].getList("sink");
    assertTrueEventually(() -> {
        assertEquals(messageCount * 2, list.size());
        for (int i = 0; i < messageCount; i++) {
            Entry<Integer, String> entry1 = createEntry(i);
            Entry<Integer, String> entry2 = createEntry(i - messageCount);
            assertTrue("missing entry: " + entry1, list.contains(entry1));
            assertTrue("missing entry: " + entry2, list.contains(entry2));
        }
    }, 15);
    if (guarantee != ProcessingGuarantee.NONE) {
        // wait until a new snapshot appears
        JobRepository jr = new JobRepository(instances[0]);
        long currentMax = jr.getJobExecutionRecord(job.getId()).snapshotId();
        assertTrueEventually(() -> {
            JobExecutionRecord jobExecutionRecord = jr.getJobExecutionRecord(job.getId());
            assertNotNull("jobExecutionRecord == null", jobExecutionRecord);
            long newMax = jobExecutionRecord.snapshotId();
            assertTrue("no snapshot produced", newMax > currentMax);
            System.out.println("snapshot " + newMax + " found, previous was " + currentMax);
        });
        // Bring down one member. Job should restart and drain additional items (and maybe
        // some of the previous duplicately).
        instances[1].getLifecycleService().terminate();
        Thread.sleep(500);
        for (int i = messageCount; i < 2 * messageCount; i++) {
            kafkaTestSupport.produce(topic1Name, i, Integer.toString(i));
            kafkaTestSupport.produce(topic2Name, i - messageCount, Integer.toString(i - messageCount));
        }
        assertTrueEventually(() -> {
            assertTrue("Not all messages were received", list.size() >= messageCount * 4);
            for (int i = 0; i < 2 * messageCount; i++) {
                Entry<Integer, String> entry1 = createEntry(i);
                Entry<Integer, String> entry2 = createEntry(i - messageCount);
                assertTrue("missing entry: " + entry1.toString(), list.contains(entry1));
                assertTrue("missing entry: " + entry2.toString(), list.contains(entry2));
            }
        }, 10);
    }
    assertFalse(job.getFuture().isDone());
    // cancel the job
    job.cancel();
    assertTrueEventually(() -> assertTrue(job.getFuture().isDone()));
}
Also used : JobRepository(com.hazelcast.jet.impl.JobRepository) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) JobConfig(com.hazelcast.jet.config.JobConfig) Pipeline(com.hazelcast.jet.pipeline.Pipeline) HazelcastInstance(com.hazelcast.core.HazelcastInstance) Job(com.hazelcast.jet.Job)

Example 2 with JobExecutionRecord

use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.

the class OperationLossTest method when_snapshotOperationLost_then_retried.

@Test
public void when_snapshotOperationLost_then_retried() {
    PacketFiltersUtil.dropOperationsFrom(instance(), JetInitDataSerializerHook.FACTORY_ID, singletonList(JetInitDataSerializerHook.SNAPSHOT_PHASE1_OPERATION));
    DAG dag = new DAG();
    Vertex v1 = dag.newVertex("v1", () -> new DummyStatefulP()).localParallelism(1);
    Vertex v2 = dag.newVertex("v2", mapP(identity())).localParallelism(1);
    dag.edge(between(v1, v2).distributed());
    Job job = instance().getJet().newJob(dag, new JobConfig().setProcessingGuarantee(EXACTLY_ONCE).setSnapshotIntervalMillis(100));
    assertJobStatusEventually(job, RUNNING);
    JobRepository jobRepository = new JobRepository(instance());
    assertTrueEventually(() -> {
        JobExecutionRecord record = jobRepository.getJobExecutionRecord(job.getId());
        assertNotNull("null JobExecutionRecord", record);
        assertEquals("ongoingSnapshotId", 0, record.ongoingSnapshotId());
    }, 20);
    sleepSeconds(1);
    // now lift the filter and check that a snapshot is done
    logger.info("Lifting the packet filter...");
    PacketFiltersUtil.resetPacketFiltersFrom(instance());
    waitForFirstSnapshot(jobRepository, job.getId(), 10, false);
    cancelAndJoin(job);
}
Also used : DummyStatefulP(com.hazelcast.jet.core.TestProcessors.DummyStatefulP) Job(com.hazelcast.jet.Job) JobRepository(com.hazelcast.jet.impl.JobRepository) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) JobConfig(com.hazelcast.jet.config.JobConfig) NightlyTest(com.hazelcast.test.annotation.NightlyTest) Test(org.junit.Test)

Example 3 with JobExecutionRecord

use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.

the class JobTimeoutClusterTest method when_masterFails_timedOutJobIsCancelled.

@Test
public void when_masterFails_timedOutJobIsCancelled() {
    final HazelcastInstance[] instances = createHazelcastInstances(2);
    final HazelcastInstance oldMaster = instances[0];
    final HazelcastInstance newMaster = instances[1];
    assertClusterSizeEventually(2, newMaster);
    assertClusterStateEventually(ClusterState.ACTIVE, newMaster);
    final DAG dag = new DAG();
    dag.newVertex("stuck", () -> new MockP().streaming());
    final JobConfig jobConfig = new JobConfig().setTimeoutMillis(10000L).setSnapshotIntervalMillis(1L).setProcessingGuarantee(ProcessingGuarantee.EXACTLY_ONCE);
    final Job job = oldMaster.getJet().newJob(dag, jobConfig);
    final long jobId = job.getId();
    // start and wait for the job to start running
    assertJobStatusEventually(job, JobStatus.RUNNING);
    final JobRepository oldJobRepository = new JobRepository(oldMaster);
    assertTrueEventually(() -> {
        final JobExecutionRecord record = oldJobRepository.getJobExecutionRecord(jobId);
        assertTrue(record.snapshotId() > 0);
    });
    // kill old master and wait for the cluster to reconfigure
    oldMaster.getLifecycleService().terminate();
    assertClusterStateEventually(ClusterState.ACTIVE, newMaster);
    assertClusterSize(1, newMaster);
    // wait for the job to be restarted and cancelled due to timeout
    final Job restartedJob = newMaster.getJet().getJob(jobId);
    assertNotNull(restartedJob);
    assertJobStatusEventually(restartedJob, JobStatus.FAILED);
}
Also used : HazelcastInstance(com.hazelcast.core.HazelcastInstance) MockP(com.hazelcast.jet.core.TestProcessors.MockP) Job(com.hazelcast.jet.Job) JobRepository(com.hazelcast.jet.impl.JobRepository) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) JobConfig(com.hazelcast.jet.config.JobConfig) Test(org.junit.Test) SlowTest(com.hazelcast.test.annotation.SlowTest)

Example 4 with JobExecutionRecord

use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.

the class SplitBrainTest method when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated.

@Test
public void when_newMemberJoinsToCluster_then_jobQuorumSizeIsUpdated() {
    int clusterSize = 3;
    HazelcastInstance[] instances = new HazelcastInstance[clusterSize];
    for (int i = 0; i < clusterSize; i++) {
        instances[i] = createHazelcastInstance(createConfig());
    }
    NoOutputSourceP.executionStarted = new CountDownLatch(clusterSize * PARALLELISM);
    MockPS processorSupplier = new MockPS(NoOutputSourceP::new, clusterSize);
    DAG dag = new DAG().vertex(new Vertex("test", processorSupplier).localParallelism(PARALLELISM));
    Job job = instances[0].getJet().newJob(dag, new JobConfig().setSplitBrainProtection(true));
    assertOpenEventually(NoOutputSourceP.executionStarted);
    createHazelcastInstance(createConfig());
    assertTrueEventually(() -> {
        JetServiceBackend service = getJetServiceBackend(instances[0]);
        JobRepository jobRepository = service.getJobRepository();
        JobExecutionRecord record = jobRepository.getJobExecutionRecord(job.getId());
        assertEquals(3, record.getQuorumSize());
        MasterContext masterContext = service.getJobCoordinationService().getMasterContext(job.getId());
        assertEquals(3, masterContext.jobExecutionRecord().getQuorumSize());
    });
    NoOutputSourceP.proceedLatch.countDown();
}
Also used : MockPS(com.hazelcast.jet.core.TestProcessors.MockPS) CountDownLatch(java.util.concurrent.CountDownLatch) JobRepository(com.hazelcast.jet.impl.JobRepository) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) JobConfig(com.hazelcast.jet.config.JobConfig) HazelcastInstance(com.hazelcast.core.HazelcastInstance) NoOutputSourceP(com.hazelcast.jet.core.TestProcessors.NoOutputSourceP) Job(com.hazelcast.jet.Job) MasterContext(com.hazelcast.jet.impl.MasterContext) JetServiceBackend(com.hazelcast.jet.impl.JetServiceBackend) NightlyTest(com.hazelcast.test.annotation.NightlyTest) Test(org.junit.Test)

Example 5 with JobExecutionRecord

use of com.hazelcast.jet.impl.JobExecutionRecord in project hazelcast by hazelcast.

the class JetTestSupport method waitForNextSnapshot.

public void waitForNextSnapshot(JobRepository jr, long jobId, int timeoutSeconds, boolean allowEmptySnapshot) {
    long originalSnapshotId = jr.getJobExecutionRecord(jobId).snapshotId();
    // wait until there is at least one more snapshot
    long[] snapshotId = { -1 };
    long start = System.nanoTime();
    assertTrueEventually(() -> {
        JobExecutionRecord record = jr.getJobExecutionRecord(jobId);
        assertNotNull("jobExecutionRecord is null", record);
        snapshotId[0] = record.snapshotId();
        assertTrue("No more snapshots produced in " + timeoutSeconds + " seconds", snapshotId[0] > originalSnapshotId);
        assertTrue("stats are 0", allowEmptySnapshot || record.snapshotStats().numBytes() > 0);
    }, timeoutSeconds);
    SUPPORT_LOGGER.info("Next snapshot found after " + NANOSECONDS.toMillis(System.nanoTime() - start) + " ms (id=" + snapshotId[0] + ", previous id=" + originalSnapshotId + ")");
}
Also used : JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord)

Aggregations

JobExecutionRecord (com.hazelcast.jet.impl.JobExecutionRecord)8 Job (com.hazelcast.jet.Job)6 JobConfig (com.hazelcast.jet.config.JobConfig)6 JobRepository (com.hazelcast.jet.impl.JobRepository)6 Test (org.junit.Test)4 HazelcastInstance (com.hazelcast.core.HazelcastInstance)3 NightlyTest (com.hazelcast.test.annotation.NightlyTest)2 SlowTest (com.hazelcast.test.annotation.SlowTest)2 DummyStatefulP (com.hazelcast.jet.core.TestProcessors.DummyStatefulP)1 MockP (com.hazelcast.jet.core.TestProcessors.MockP)1 MockPS (com.hazelcast.jet.core.TestProcessors.MockPS)1 NoOutputSourceP (com.hazelcast.jet.core.TestProcessors.NoOutputSourceP)1 JetServiceBackend (com.hazelcast.jet.impl.JetServiceBackend)1 MasterContext (com.hazelcast.jet.impl.MasterContext)1 Pipeline (com.hazelcast.jet.pipeline.Pipeline)1 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)1 CountDownLatch (java.util.concurrent.CountDownLatch)1