Search in sources :

Example 6 with KeyedWindowResult

use of com.hazelcast.jet.datamodel.KeyedWindowResult in project hazelcast by hazelcast.

the class JobRestartWithSnapshotTest method when_nodeDown_then_jobRestartsFromSnapshot.

@SuppressWarnings("unchecked")
private void when_nodeDown_then_jobRestartsFromSnapshot(boolean twoStage) throws Exception {
    /*
        Design of this test:

        It uses a random partitioned generator of source events. The events are
        Map.Entry(partitionId, timestamp). For each partition timestamps from
        0..elementsInPartition are generated.

        We start the test with two nodes and localParallelism(1) and 3 partitions
        for source. Source instances generate items at the same rate of 10 per
        second: this causes one instance to be twice as fast as the other in terms of
        timestamp. The source processor saves partition offsets similarly to how
        KafkaSources.kafka() and Sources.mapJournal() do.

        After some time we shut down one instance. The job restarts from the
        snapshot and all partitions are restored to single source processor
        instance. Partition offsets are very different, so the source is written
        in a way that it emits from the most-behind partition in order to not
        emit late events from more ahead partitions.

        Local parallelism of InsertWatermarkP is also 1 to avoid the edge case
        when different instances of InsertWatermarkP might initialize with first
        event in different frame and make them start the no-gap emission from
        different WM, which might cause the SlidingWindowP downstream to miss
        some of the first windows.

        The sink writes to an IMap which is an idempotent sink.

        The resulting contents of the sink map are compared to expected value.
        */
    DAG dag = new DAG();
    SlidingWindowPolicy wDef = SlidingWindowPolicy.tumblingWinPolicy(3);
    AggregateOperation1<Object, LongAccumulator, Long> aggrOp = counting();
    IMap<List<Long>, Long> result = instance1.getMap("result");
    result.clear();
    int numPartitions = 3;
    int elementsInPartition = 250;
    SupplierEx<Processor> sup = () -> new SequencesInPartitionsGeneratorP(numPartitions, elementsInPartition, true);
    Vertex generator = dag.newVertex("generator", throttle(sup, 30)).localParallelism(1);
    Vertex insWm = dag.newVertex("insWm", insertWatermarksP(eventTimePolicy(o -> ((Entry<Integer, Integer>) o).getValue(), limitingLag(0), wDef.frameSize(), wDef.frameOffset(), 0))).localParallelism(1);
    Vertex map = dag.newVertex("map", mapP((KeyedWindowResult kwr) -> entry(asList(kwr.end(), (long) (int) kwr.key()), kwr.result())));
    Vertex writeMap = dag.newVertex("writeMap", SinkProcessors.writeMapP("result"));
    if (twoStage) {
        Vertex aggregateStage1 = dag.newVertex("aggregateStage1", Processors.accumulateByFrameP(singletonList((FunctionEx<? super Object, ?>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, aggrOp.withIdentityFinish()));
        Vertex aggregateStage2 = dag.newVertex("aggregateStage2", combineToSlidingWindowP(wDef, aggrOp, KeyedWindowResult::new));
        dag.edge(between(insWm, aggregateStage1).partitioned(entryKey())).edge(between(aggregateStage1, aggregateStage2).distributed().partitioned(entryKey())).edge(between(aggregateStage2, map));
    } else {
        Vertex aggregate = dag.newVertex("aggregate", Processors.aggregateToSlidingWindowP(singletonList((FunctionEx<Object, Integer>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, 0L, aggrOp, KeyedWindowResult::new));
        dag.edge(between(insWm, aggregate).distributed().partitioned(entryKey())).edge(between(aggregate, map));
    }
    dag.edge(between(generator, insWm)).edge(between(map, writeMap));
    JobConfig config = new JobConfig();
    config.setProcessingGuarantee(EXACTLY_ONCE);
    config.setSnapshotIntervalMillis(1200);
    Job job = instance1.getJet().newJob(dag, config);
    JobRepository jobRepository = new JobRepository(instance1);
    int timeout = (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis() * 3) + 8);
    waitForFirstSnapshot(jobRepository, job.getId(), timeout, false);
    waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
    // wait a little more to emit something, so that it will be overwritten in the sink map
    Thread.sleep(300);
    instance2.getLifecycleService().terminate();
    // Now the job should detect member shutdown and restart from snapshot.
    // Let's wait until the next snapshot appears.
    waitForNextSnapshot(jobRepository, job.getId(), (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis()) + 10), false);
    waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
    job.join();
    // compute expected result
    Map<List<Long>, Long> expectedMap = new HashMap<>();
    for (long partition = 0; partition < numPartitions; partition++) {
        long cnt = 0;
        for (long value = 1; value <= elementsInPartition; value++) {
            cnt++;
            if (value % wDef.frameSize() == 0) {
                expectedMap.put(asList(value, partition), cnt);
                cnt = 0;
            }
        }
        if (cnt > 0) {
            expectedMap.put(asList(wDef.higherFrameTs(elementsInPartition - 1), partition), cnt);
        }
    }
    // check expected result
    if (!expectedMap.equals(result)) {
        System.out.println("All expected entries: " + expectedMap.entrySet().stream().map(Object::toString).collect(joining(", ")));
        System.out.println("All actual entries: " + result.entrySet().stream().map(Object::toString).collect(joining(", ")));
        System.out.println("Non-received expected items: " + expectedMap.keySet().stream().filter(key -> !result.containsKey(key)).map(Object::toString).collect(joining(", ")));
        System.out.println("Received non-expected items: " + result.entrySet().stream().filter(entry -> !expectedMap.containsKey(entry.getKey())).map(Object::toString).collect(joining(", ")));
        System.out.println("Different keys: ");
        for (Entry<List<Long>, Long> rEntry : result.entrySet()) {
            Long expectedValue = expectedMap.get(rEntry.getKey());
            if (expectedValue != null && !expectedValue.equals(rEntry.getValue())) {
                System.out.println("key: " + rEntry.getKey() + ", expected value: " + expectedValue + ", actual value: " + rEntry.getValue());
            }
        }
        System.out.println("-- end of different keys");
        assertEquals(expectedMap, new HashMap<>(result));
    }
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) Traverser(com.hazelcast.jet.Traverser) Arrays(java.util.Arrays) PacketFiltersUtil.delayOperationsFrom(com.hazelcast.test.PacketFiltersUtil.delayOperationsFrom) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) Collections.singletonList(java.util.Collections.singletonList) Functions.entryKey(com.hazelcast.function.Functions.entryKey) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) FunctionEx(com.hazelcast.function.FunctionEx) JobConfig(com.hazelcast.jet.config.JobConfig) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) SupplierEx(com.hazelcast.function.SupplierEx) Collectors.joining(java.util.stream.Collectors.joining) List(java.util.List) BroadcastKey.broadcastKey(com.hazelcast.jet.core.BroadcastKey.broadcastKey) SinkProcessors(com.hazelcast.jet.core.processor.SinkProcessors) HazelcastParallelClassRunner(com.hazelcast.test.HazelcastParallelClassRunner) Entry(java.util.Map.Entry) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) Util.arrayIndexOf(com.hazelcast.jet.impl.util.Util.arrayIndexOf) IntStream(java.util.stream.IntStream) RunWith(org.junit.runner.RunWith) Processors(com.hazelcast.jet.core.processor.Processors) HashMap(java.util.HashMap) JetInitDataSerializerHook(com.hazelcast.jet.impl.execution.init.JetInitDataSerializerHook) HashSet(java.util.HashSet) TestUtil.throttle(com.hazelcast.jet.core.TestUtil.throttle) Util.entry(com.hazelcast.jet.Util.entry) Processors.combineToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP) ExpectedException(org.junit.rules.ExpectedException) Nonnull(javax.annotation.Nonnull) Processors.insertWatermarksP(com.hazelcast.jet.core.processor.Processors.insertWatermarksP) Job(com.hazelcast.jet.Job) Before(org.junit.Before) JobRepository(com.hazelcast.jet.impl.JobRepository) Config(com.hazelcast.config.Config) HazelcastInstance(com.hazelcast.core.HazelcastInstance) Assert.assertNotNull(org.junit.Assert.assertNotNull) EXACTLY_ONCE(com.hazelcast.jet.config.ProcessingGuarantee.EXACTLY_ONCE) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AggregateOperation1(com.hazelcast.jet.aggregate.AggregateOperation1) SlowTest(com.hazelcast.test.annotation.SlowTest) WatermarkPolicy.limitingLag(com.hazelcast.jet.core.WatermarkPolicy.limitingLag) Traversers(com.hazelcast.jet.Traversers) Rule(org.junit.Rule) LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator) EventTimePolicy.eventTimePolicy(com.hazelcast.jet.core.EventTimePolicy.eventTimePolicy) Assert.assertEquals(org.junit.Assert.assertEquals) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) SinkProcessors.writeListP(com.hazelcast.jet.core.processor.SinkProcessors.writeListP) HashMap(java.util.HashMap) JobRepository(com.hazelcast.jet.impl.JobRepository) JobConfig(com.hazelcast.jet.config.JobConfig) Entry(java.util.Map.Entry) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) Job(com.hazelcast.jet.Job) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator)

Example 7 with KeyedWindowResult

use of com.hazelcast.jet.datamodel.KeyedWindowResult in project hazelcast by hazelcast.

the class WatermarkCoalescer_TerminalSnapshotTest method test.

@Test
public void test() throws Exception {
    /*
        This test tests the issue that after a terminal barrier is processed, no other work should
        be done by the ProcessorTasklet or CIES after that (except for emitting the DONE_ITEM).
        Also, if at-least-once guarantee is used, the tasklet should not continue to drain
        the queue that had the barrier while waiting for other barriers.

        Specifically, the issue was that in at-least-once mode the DONE_ITEM was processed
        after the terminal barrier while waiting for the barrier on other queues/edges. The
        DONE_ITEM could have caused a WM being emitted after the barrier, which is ok
        for the at-least-once mode, but the terminal snapshot should behave as if exactly-once
        mode was used.

        This test ensures that we're waiting for a WM in coalescer (by having a stream skew)
        and then does a graceful restart in at-least-once mode and checks that the results are
        correct.
         */
    String key0 = generateKeyForPartition(instance, 0);
    String key1 = generateKeyForPartition(instance, 1);
    Pipeline p = Pipeline.create();
    p.readFrom(Sources.mapJournal(sourceMap, JournalInitialPosition.START_FROM_OLDEST)).withTimestamps(Map.Entry::getValue, 0).setLocalParallelism(PARTITION_COUNT).groupingKey(Map.Entry::getKey).window(WindowDefinition.sliding(1, 1)).aggregate(AggregateOperations.counting()).setLocalParallelism(PARTITION_COUNT).writeTo(SinkBuilder.sinkBuilder("throwing", ctx -> "").<KeyedWindowResult<String, Long>>receiveFn((w, kwr) -> {
        if (kwr.result() != COUNT) {
            throw new RuntimeException("Received unexpected item " + kwr + ", expected count is " + COUNT);
        }
    }).build());
    Job job = instance.getJet().newJob(p, new JobConfig().setProcessingGuarantee(ProcessingGuarantee.AT_LEAST_ONCE));
    List<Future> futures = new ArrayList<>();
    futures.add(spawn(() -> {
        for (; ; ) {
            assertJobStatusEventually(job, JobStatus.RUNNING);
            System.out.println("============RESTARTING JOB=========");
            job.restart();
            Thread.sleep(2000);
        }
    }));
    // one producer is twice as fast as the other, to cause waiting for WM while doing snapshot
    futures.add(spawn(() -> producer(key0, 1)));
    futures.add(spawn(() -> producer(key1, 2)));
    sleepSeconds(20);
    for (Future f : futures) {
        f.cancel(true);
        // check that the future was cancelled and didn't fail with another error
        try {
            f.get();
            fail("Exception was expected");
        } catch (CancellationException expected) {
        }
    }
    // check that the job is running
    JobStatus status = job.getStatus();
    assertTrue("job should not be completed, status=" + status, status != FAILED && status != COMPLETED && status != SUSPENDED);
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) QuickTest(com.hazelcast.test.annotation.QuickTest) RunWith(org.junit.runner.RunWith) EventJournalConfig(com.hazelcast.config.EventJournalConfig) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Map(java.util.Map) SUSPENDED(com.hazelcast.jet.core.JobStatus.SUSPENDED) Assert.fail(org.junit.Assert.fail) JobStatus(com.hazelcast.jet.core.JobStatus) Job(com.hazelcast.jet.Job) Before(org.junit.Before) Config(com.hazelcast.config.Config) HazelcastInstance(com.hazelcast.core.HazelcastInstance) WindowDefinition(com.hazelcast.jet.pipeline.WindowDefinition) Pipeline(com.hazelcast.jet.pipeline.Pipeline) CancellationException(java.util.concurrent.CancellationException) JetTestSupport(com.hazelcast.jet.core.JetTestSupport) JobConfig(com.hazelcast.jet.config.JobConfig) AggregateOperations(com.hazelcast.jet.aggregate.AggregateOperations) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) ClusterProperty(com.hazelcast.spi.properties.ClusterProperty) Sources(com.hazelcast.jet.pipeline.Sources) TimeUnit(java.util.concurrent.TimeUnit) LockSupport(java.util.concurrent.locks.LockSupport) List(java.util.List) JournalInitialPosition(com.hazelcast.jet.pipeline.JournalInitialPosition) HazelcastParallelClassRunner(com.hazelcast.test.HazelcastParallelClassRunner) FAILED(com.hazelcast.jet.core.JobStatus.FAILED) SinkBuilder(com.hazelcast.jet.pipeline.SinkBuilder) ProcessingGuarantee(com.hazelcast.jet.config.ProcessingGuarantee) COMPLETED(com.hazelcast.jet.core.JobStatus.COMPLETED) IMap(com.hazelcast.map.IMap) ArrayList(java.util.ArrayList) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) JobConfig(com.hazelcast.jet.config.JobConfig) Pipeline(com.hazelcast.jet.pipeline.Pipeline) JobStatus(com.hazelcast.jet.core.JobStatus) CancellationException(java.util.concurrent.CancellationException) Future(java.util.concurrent.Future) Job(com.hazelcast.jet.Job) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Aggregations

KeyedWindowResult (com.hazelcast.jet.datamodel.KeyedWindowResult)7 Test (org.junit.Test)6 LongAccumulator (com.hazelcast.jet.accumulator.LongAccumulator)5 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)5 QuickTest (com.hazelcast.test.annotation.QuickTest)4 ArrayList (java.util.ArrayList)4 List (java.util.List)4 Entry (java.util.Map.Entry)4 Before (org.junit.Before)4 HazelcastInstance (com.hazelcast.core.HazelcastInstance)3 FunctionEx (com.hazelcast.function.FunctionEx)3 Job (com.hazelcast.jet.Job)3 Util.entry (com.hazelcast.jet.Util.entry)3 AggregateOperation1 (com.hazelcast.jet.aggregate.AggregateOperation1)3 Processors.combineToSlidingWindowP (com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP)3 Arrays.asList (java.util.Arrays.asList)3 Collections.singletonList (java.util.Collections.singletonList)3 Assert.assertTrue (org.junit.Assert.assertTrue)3 Rule (org.junit.Rule)3 Category (org.junit.experimental.categories.Category)3