Search in sources :

Example 26 with FunctionEx

use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.

the class SlidingWindowPTest method before.

@Before
public void before() {
    SlidingWindowPolicy winPolicy = slidingWinPolicy(4, 1);
    AggregateOperation1<Entry<?, Long>, LongAccumulator, Long> operation = AggregateOperation.withCreate(LongAccumulator::new).andAccumulate((LongAccumulator acc, Entry<?, Long> item) -> acc.add(item.getValue())).andCombine(LongAccumulator::add).andDeduct(hasDeduct ? LongAccumulator::subtract : null).andExportFinish(LongAccumulator::get);
    FunctionEx<?, Long> keyFn = t -> KEY;
    ToLongFunctionEx<Entry<Long, Long>> timestampFn = Entry::getKey;
    SupplierEx<Processor> procSupplier = singleStageProcessor ? aggregateToSlidingWindowP(singletonList(keyFn), singletonList(timestampFn), TimestampKind.EVENT, winPolicy, 0L, operation, KeyedWindowResult::new) : combineToSlidingWindowP(winPolicy, operation, KeyedWindowResult::new);
    // new supplier to save the last supplied instance
    supplier = () -> lastSuppliedProcessor = (SlidingWindowP) procSupplier.get();
}
Also used : LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator) Collections.shuffle(java.util.Collections.shuffle) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Arrays(java.util.Arrays) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) QuickTest(com.hazelcast.test.annotation.QuickTest) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) Processor(com.hazelcast.jet.core.Processor) TimestampKind(com.hazelcast.jet.core.TimestampKind) TestSupport.verifyProcessor(com.hazelcast.jet.core.test.TestSupport.verifyProcessor) SlidingWindowPolicy.slidingWinPolicy(com.hazelcast.jet.core.SlidingWindowPolicy.slidingWinPolicy) ArrayList(java.util.ArrayList) Collections.singletonList(java.util.Collections.singletonList) Watermark(com.hazelcast.jet.core.Watermark) SlidingWindowPolicy(com.hazelcast.jet.core.SlidingWindowPolicy) Processors.aggregateToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.aggregateToSlidingWindowP) AggregateOperation(com.hazelcast.jet.aggregate.AggregateOperation) Util.entry(com.hazelcast.jet.Util.entry) Arrays.asList(java.util.Arrays.asList) After(org.junit.After) JetTestSupport.wm(com.hazelcast.jet.core.JetTestSupport.wm) Processors.combineToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) UseParametersRunnerFactory(org.junit.runners.Parameterized.UseParametersRunnerFactory) FunctionEx(com.hazelcast.function.FunctionEx) LongStream(java.util.stream.LongStream) HazelcastParametrizedRunner(com.hazelcast.test.HazelcastParametrizedRunner) Parameter(org.junit.runners.Parameterized.Parameter) Collection(java.util.Collection) Assert.assertTrue(org.junit.Assert.assertTrue) HazelcastParallelParametersRunnerFactory(com.hazelcast.test.HazelcastParallelParametersRunnerFactory) Test(org.junit.Test) AggregateOperation1(com.hazelcast.jet.aggregate.AggregateOperation1) Category(org.junit.experimental.categories.Category) SupplierEx(com.hazelcast.function.SupplierEx) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ToLongFunctionEx(com.hazelcast.function.ToLongFunctionEx) Rule(org.junit.Rule) LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator) Entry(java.util.Map.Entry) Entry(java.util.Map.Entry) SlidingWindowPolicy(com.hazelcast.jet.core.SlidingWindowPolicy) Processor(com.hazelcast.jet.core.Processor) TestSupport.verifyProcessor(com.hazelcast.jet.core.test.TestSupport.verifyProcessor) Processors.aggregateToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.aggregateToSlidingWindowP) Processors.combineToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP) Before(org.junit.Before)

Example 27 with FunctionEx

use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.

the class SlidingWindowP_twoStageSnapshotTest method before.

@Before
public void before() {
    SlidingWindowPolicy windowDef = slidingWinPolicy(4, 1);
    AggregateOperation1<Entry<?, Long>, LongAccumulator, Long> aggrOp = AggregateOperation.withCreate(LongAccumulator::new).andAccumulate((LongAccumulator acc, Entry<?, Long> item) -> acc.add(item.getValue())).andCombine(LongAccumulator::add).andDeduct(LongAccumulator::subtract).andExportFinish(LongAccumulator::get);
    SupplierEx<Processor> procSupplier1 = Processors.accumulateByFrameP(singletonList((FunctionEx<? super Entry<Long, Long>, ?>) t -> KEY), singletonList((ToLongFunctionEx<? super Entry<Long, Long>>) Entry::getKey), TimestampKind.EVENT, windowDef, aggrOp.withIdentityFinish());
    SupplierEx<Processor> procSupplier2 = combineToSlidingWindowP(windowDef, aggrOp, KeyedWindowResult::new);
    // new supplier to save the last supplied instance
    stage1Supplier = () -> lastSuppliedStage1Processor = (SlidingWindowP<?, ?, ?, ?>) procSupplier1.get();
    stage2Supplier = () -> lastSuppliedStage2Processor = (SlidingWindowP<?, ?, ?, ?>) procSupplier2.get();
}
Also used : LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator) Entry(java.util.Map.Entry) SlidingWindowPolicy(com.hazelcast.jet.core.SlidingWindowPolicy) Processor(com.hazelcast.jet.core.Processor) ToLongFunctionEx(com.hazelcast.function.ToLongFunctionEx) FunctionEx(com.hazelcast.function.FunctionEx) ToLongFunctionEx(com.hazelcast.function.ToLongFunctionEx) Processors.combineToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) Before(org.junit.Before)

Example 28 with FunctionEx

use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.

the class JobRestartWithSnapshotTest method when_nodeDown_then_jobRestartsFromSnapshot.

@SuppressWarnings("unchecked")
private void when_nodeDown_then_jobRestartsFromSnapshot(boolean twoStage) throws Exception {
    /*
        Design of this test:

        It uses a random partitioned generator of source events. The events are
        Map.Entry(partitionId, timestamp). For each partition timestamps from
        0..elementsInPartition are generated.

        We start the test with two nodes and localParallelism(1) and 3 partitions
        for source. Source instances generate items at the same rate of 10 per
        second: this causes one instance to be twice as fast as the other in terms of
        timestamp. The source processor saves partition offsets similarly to how
        KafkaSources.kafka() and Sources.mapJournal() do.

        After some time we shut down one instance. The job restarts from the
        snapshot and all partitions are restored to single source processor
        instance. Partition offsets are very different, so the source is written
        in a way that it emits from the most-behind partition in order to not
        emit late events from more ahead partitions.

        Local parallelism of InsertWatermarkP is also 1 to avoid the edge case
        when different instances of InsertWatermarkP might initialize with first
        event in different frame and make them start the no-gap emission from
        different WM, which might cause the SlidingWindowP downstream to miss
        some of the first windows.

        The sink writes to an IMap which is an idempotent sink.

        The resulting contents of the sink map are compared to expected value.
        */
    DAG dag = new DAG();
    SlidingWindowPolicy wDef = SlidingWindowPolicy.tumblingWinPolicy(3);
    AggregateOperation1<Object, LongAccumulator, Long> aggrOp = counting();
    IMap<List<Long>, Long> result = instance1.getMap("result");
    result.clear();
    int numPartitions = 3;
    int elementsInPartition = 250;
    SupplierEx<Processor> sup = () -> new SequencesInPartitionsGeneratorP(numPartitions, elementsInPartition, true);
    Vertex generator = dag.newVertex("generator", throttle(sup, 30)).localParallelism(1);
    Vertex insWm = dag.newVertex("insWm", insertWatermarksP(eventTimePolicy(o -> ((Entry<Integer, Integer>) o).getValue(), limitingLag(0), wDef.frameSize(), wDef.frameOffset(), 0))).localParallelism(1);
    Vertex map = dag.newVertex("map", mapP((KeyedWindowResult kwr) -> entry(asList(kwr.end(), (long) (int) kwr.key()), kwr.result())));
    Vertex writeMap = dag.newVertex("writeMap", SinkProcessors.writeMapP("result"));
    if (twoStage) {
        Vertex aggregateStage1 = dag.newVertex("aggregateStage1", Processors.accumulateByFrameP(singletonList((FunctionEx<? super Object, ?>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, aggrOp.withIdentityFinish()));
        Vertex aggregateStage2 = dag.newVertex("aggregateStage2", combineToSlidingWindowP(wDef, aggrOp, KeyedWindowResult::new));
        dag.edge(between(insWm, aggregateStage1).partitioned(entryKey())).edge(between(aggregateStage1, aggregateStage2).distributed().partitioned(entryKey())).edge(between(aggregateStage2, map));
    } else {
        Vertex aggregate = dag.newVertex("aggregate", Processors.aggregateToSlidingWindowP(singletonList((FunctionEx<Object, Integer>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, 0L, aggrOp, KeyedWindowResult::new));
        dag.edge(between(insWm, aggregate).distributed().partitioned(entryKey())).edge(between(aggregate, map));
    }
    dag.edge(between(generator, insWm)).edge(between(map, writeMap));
    JobConfig config = new JobConfig();
    config.setProcessingGuarantee(EXACTLY_ONCE);
    config.setSnapshotIntervalMillis(1200);
    Job job = instance1.getJet().newJob(dag, config);
    JobRepository jobRepository = new JobRepository(instance1);
    int timeout = (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis() * 3) + 8);
    waitForFirstSnapshot(jobRepository, job.getId(), timeout, false);
    waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
    // wait a little more to emit something, so that it will be overwritten in the sink map
    Thread.sleep(300);
    instance2.getLifecycleService().terminate();
    // Now the job should detect member shutdown and restart from snapshot.
    // Let's wait until the next snapshot appears.
    waitForNextSnapshot(jobRepository, job.getId(), (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis()) + 10), false);
    waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
    job.join();
    // compute expected result
    Map<List<Long>, Long> expectedMap = new HashMap<>();
    for (long partition = 0; partition < numPartitions; partition++) {
        long cnt = 0;
        for (long value = 1; value <= elementsInPartition; value++) {
            cnt++;
            if (value % wDef.frameSize() == 0) {
                expectedMap.put(asList(value, partition), cnt);
                cnt = 0;
            }
        }
        if (cnt > 0) {
            expectedMap.put(asList(wDef.higherFrameTs(elementsInPartition - 1), partition), cnt);
        }
    }
    // check expected result
    if (!expectedMap.equals(result)) {
        System.out.println("All expected entries: " + expectedMap.entrySet().stream().map(Object::toString).collect(joining(", ")));
        System.out.println("All actual entries: " + result.entrySet().stream().map(Object::toString).collect(joining(", ")));
        System.out.println("Non-received expected items: " + expectedMap.keySet().stream().filter(key -> !result.containsKey(key)).map(Object::toString).collect(joining(", ")));
        System.out.println("Received non-expected items: " + result.entrySet().stream().filter(entry -> !expectedMap.containsKey(entry.getKey())).map(Object::toString).collect(joining(", ")));
        System.out.println("Different keys: ");
        for (Entry<List<Long>, Long> rEntry : result.entrySet()) {
            Long expectedValue = expectedMap.get(rEntry.getKey());
            if (expectedValue != null && !expectedValue.equals(rEntry.getValue())) {
                System.out.println("key: " + rEntry.getKey() + ", expected value: " + expectedValue + ", actual value: " + rEntry.getValue());
            }
        }
        System.out.println("-- end of different keys");
        assertEquals(expectedMap, new HashMap<>(result));
    }
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) Traverser(com.hazelcast.jet.Traverser) Arrays(java.util.Arrays) PacketFiltersUtil.delayOperationsFrom(com.hazelcast.test.PacketFiltersUtil.delayOperationsFrom) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) Processors.mapP(com.hazelcast.jet.core.processor.Processors.mapP) Collections.singletonList(java.util.Collections.singletonList) Functions.entryKey(com.hazelcast.function.Functions.entryKey) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) FunctionEx(com.hazelcast.function.FunctionEx) JobConfig(com.hazelcast.jet.config.JobConfig) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) SupplierEx(com.hazelcast.function.SupplierEx) Collectors.joining(java.util.stream.Collectors.joining) List(java.util.List) BroadcastKey.broadcastKey(com.hazelcast.jet.core.BroadcastKey.broadcastKey) SinkProcessors(com.hazelcast.jet.core.processor.SinkProcessors) HazelcastParallelClassRunner(com.hazelcast.test.HazelcastParallelClassRunner) Entry(java.util.Map.Entry) JobExecutionRecord(com.hazelcast.jet.impl.JobExecutionRecord) Util.arrayIndexOf(com.hazelcast.jet.impl.util.Util.arrayIndexOf) IntStream(java.util.stream.IntStream) RunWith(org.junit.runner.RunWith) Processors(com.hazelcast.jet.core.processor.Processors) HashMap(java.util.HashMap) JetInitDataSerializerHook(com.hazelcast.jet.impl.execution.init.JetInitDataSerializerHook) HashSet(java.util.HashSet) TestUtil.throttle(com.hazelcast.jet.core.TestUtil.throttle) Util.entry(com.hazelcast.jet.Util.entry) Processors.combineToSlidingWindowP(com.hazelcast.jet.core.processor.Processors.combineToSlidingWindowP) ExpectedException(org.junit.rules.ExpectedException) Nonnull(javax.annotation.Nonnull) Processors.insertWatermarksP(com.hazelcast.jet.core.processor.Processors.insertWatermarksP) Job(com.hazelcast.jet.Job) Before(org.junit.Before) JobRepository(com.hazelcast.jet.impl.JobRepository) Config(com.hazelcast.config.Config) HazelcastInstance(com.hazelcast.core.HazelcastInstance) Assert.assertNotNull(org.junit.Assert.assertNotNull) EXACTLY_ONCE(com.hazelcast.jet.config.ProcessingGuarantee.EXACTLY_ONCE) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AggregateOperation1(com.hazelcast.jet.aggregate.AggregateOperation1) SlowTest(com.hazelcast.test.annotation.SlowTest) WatermarkPolicy.limitingLag(com.hazelcast.jet.core.WatermarkPolicy.limitingLag) Traversers(com.hazelcast.jet.Traversers) Rule(org.junit.Rule) LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator) EventTimePolicy.eventTimePolicy(com.hazelcast.jet.core.EventTimePolicy.eventTimePolicy) Assert.assertEquals(org.junit.Assert.assertEquals) IMap(com.hazelcast.map.IMap) Edge.between(com.hazelcast.jet.core.Edge.between) SinkProcessors.writeListP(com.hazelcast.jet.core.processor.SinkProcessors.writeListP) HashMap(java.util.HashMap) JobRepository(com.hazelcast.jet.impl.JobRepository) JobConfig(com.hazelcast.jet.config.JobConfig) Entry(java.util.Map.Entry) Collections.singletonList(java.util.Collections.singletonList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) Job(com.hazelcast.jet.Job) KeyedWindowResult(com.hazelcast.jet.datamodel.KeyedWindowResult) LongAccumulator(com.hazelcast.jet.accumulator.LongAccumulator)

Example 29 with FunctionEx

use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.

the class ExecutionLifecycleTest method when_dataSerializable_processorSupplier_notSerializable_then_jobFails.

@Test
public void when_dataSerializable_processorSupplier_notSerializable_then_jobFails() {
    DAG dag = new DAG();
    dag.newVertex("v", ProcessorMetaSupplier.of((FunctionEx<? super Address, ? extends ProcessorSupplier>) address -> new NotSerializable_DataSerializable_ProcessorSupplier()));
    Job job = newJob(dag);
    Exception e = assertThrows(Exception.class, () -> job.join());
    assertContains(e.getMessage(), "Failed to serialize");
}
Also used : Address(com.hazelcast.cluster.Address) FunctionEx(com.hazelcast.function.FunctionEx) Job(com.hazelcast.jet.Job) JobTerminateRequestedException(com.hazelcast.jet.impl.exception.JobTerminateRequestedException) CancellationException(java.util.concurrent.CancellationException) CompletionException(java.util.concurrent.CompletionException) ExpectedException(org.junit.rules.ExpectedException) MemberLeftException(com.hazelcast.core.MemberLeftException) IOException(java.io.IOException) NotSerializableException(java.io.NotSerializableException) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Example 30 with FunctionEx

use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.

the class S3Sources method s3.

/**
 * Creates an AWS S3 {@link BatchSource} which lists all the objects in the
 * bucket-list using given {@code prefix}, reads them line by line,
 * transforms each line to the desired output object using given {@code
 * mapFn} and emits them to downstream.
 * <p>
 * The source does not save any state to snapshot. If the job is restarted,
 * it will re-emit all entries.
 * <p>
 * The default local parallelism for this processor is 2.
 * <p>
 * Here is an example which reads the objects from a single bucket with
 * applying the given prefix.
 *
 * <pre>{@code
 * Pipeline p = Pipeline.create();
 * BatchStage<String> srcStage = p.readFrom(S3Sources.s3(
 *      Arrays.asList("bucket1", "bucket2"),
 *      "prefix",
 *      StandardCharsets.UTF_8,
 *      () -> S3Client.create(),
 *      (filename, line) -> line
 * ));
 * }</pre>
 *
 * @param bucketNames    list of bucket-names
 * @param prefix         the prefix to filter the objects. Optional, passing
 *                       {@code null} will list all objects.
 * @param clientSupplier function which returns the s3 client to use
 *                       one client per processor instance is used
 * @param mapFn          the function which creates output object from each
 *                       line. Gets the object name and line as parameters
 * @param <T>            the type of the items the source emits
 */
@Nonnull
public static <T> BatchSource<T> s3(@Nonnull List<String> bucketNames, @Nullable String prefix, @Nonnull Charset charset, @Nonnull SupplierEx<? extends S3Client> clientSupplier, @Nonnull BiFunctionEx<String, String, ? extends T> mapFn) {
    String charsetName = charset.name();
    FunctionEx<InputStream, Stream<String>> readFileFn = responseInputStream -> {
        BufferedReader reader = new BufferedReader(new InputStreamReader(responseInputStream, Charset.forName(charsetName)));
        return reader.lines();
    };
    return s3(bucketNames, prefix, clientSupplier, readFileFn, mapFn);
}
Also used : Traverser(com.hazelcast.jet.Traverser) S3Object(software.amazon.awssdk.services.s3.model.S3Object) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) GetObjectResponse(software.amazon.awssdk.services.s3.model.GetObjectResponse) BiFunctionEx(com.hazelcast.function.BiFunctionEx) Charset(java.nio.charset.Charset) Util.entry(com.hazelcast.jet.Util.entry) GetObjectRequest(software.amazon.awssdk.services.s3.model.GetObjectRequest) SourceBuffer(com.hazelcast.jet.pipeline.SourceBuilder.SourceBuffer) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) FunctionEx(com.hazelcast.function.FunctionEx) BatchSource(com.hazelcast.jet.pipeline.BatchSource) Iterator(java.util.Iterator) S3Client(software.amazon.awssdk.services.s3.S3Client) UTF_8(java.nio.charset.StandardCharsets.UTF_8) InputStreamReader(java.io.InputStreamReader) SupplierEx(com.hazelcast.function.SupplierEx) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) Stream(java.util.stream.Stream) Context(com.hazelcast.jet.core.Processor.Context) Entry(java.util.Map.Entry) TriFunction(com.hazelcast.jet.function.TriFunction) BufferedReader(java.io.BufferedReader) SourceBuilder(com.hazelcast.jet.pipeline.SourceBuilder) InputStream(java.io.InputStream) InputStreamReader(java.io.InputStreamReader) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) Stream(java.util.stream.Stream) InputStream(java.io.InputStream) Nonnull(javax.annotation.Nonnull)

Aggregations

FunctionEx (com.hazelcast.function.FunctionEx)44 List (java.util.List)30 Nonnull (javax.annotation.Nonnull)20 DAG (com.hazelcast.jet.core.DAG)18 Test (org.junit.Test)18 Collections.singletonList (java.util.Collections.singletonList)15 Assert.assertEquals (org.junit.Assert.assertEquals)15 Edge (com.hazelcast.jet.core.Edge)14 BiFunctionEx (com.hazelcast.function.BiFunctionEx)13 Function (java.util.function.Function)13 LongAccumulator (com.hazelcast.jet.accumulator.LongAccumulator)12 ArrayList (java.util.ArrayList)12 ToLongFunctionEx (com.hazelcast.function.ToLongFunctionEx)11 Vertex (com.hazelcast.jet.core.Vertex)11 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)11 Entry (java.util.Map.Entry)11 Assert.assertTrue (org.junit.Assert.assertTrue)11 JetException (com.hazelcast.jet.JetException)10 AggregateOperation1 (com.hazelcast.jet.aggregate.AggregateOperation1)10 QuickTest (com.hazelcast.test.annotation.QuickTest)10