use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.
the class SlidingWindowPTest method before.
@Before
public void before() {
SlidingWindowPolicy winPolicy = slidingWinPolicy(4, 1);
AggregateOperation1<Entry<?, Long>, LongAccumulator, Long> operation = AggregateOperation.withCreate(LongAccumulator::new).andAccumulate((LongAccumulator acc, Entry<?, Long> item) -> acc.add(item.getValue())).andCombine(LongAccumulator::add).andDeduct(hasDeduct ? LongAccumulator::subtract : null).andExportFinish(LongAccumulator::get);
FunctionEx<?, Long> keyFn = t -> KEY;
ToLongFunctionEx<Entry<Long, Long>> timestampFn = Entry::getKey;
SupplierEx<Processor> procSupplier = singleStageProcessor ? aggregateToSlidingWindowP(singletonList(keyFn), singletonList(timestampFn), TimestampKind.EVENT, winPolicy, 0L, operation, KeyedWindowResult::new) : combineToSlidingWindowP(winPolicy, operation, KeyedWindowResult::new);
// new supplier to save the last supplied instance
supplier = () -> lastSuppliedProcessor = (SlidingWindowP) procSupplier.get();
}
use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.
the class SlidingWindowP_twoStageSnapshotTest method before.
@Before
public void before() {
SlidingWindowPolicy windowDef = slidingWinPolicy(4, 1);
AggregateOperation1<Entry<?, Long>, LongAccumulator, Long> aggrOp = AggregateOperation.withCreate(LongAccumulator::new).andAccumulate((LongAccumulator acc, Entry<?, Long> item) -> acc.add(item.getValue())).andCombine(LongAccumulator::add).andDeduct(LongAccumulator::subtract).andExportFinish(LongAccumulator::get);
SupplierEx<Processor> procSupplier1 = Processors.accumulateByFrameP(singletonList((FunctionEx<? super Entry<Long, Long>, ?>) t -> KEY), singletonList((ToLongFunctionEx<? super Entry<Long, Long>>) Entry::getKey), TimestampKind.EVENT, windowDef, aggrOp.withIdentityFinish());
SupplierEx<Processor> procSupplier2 = combineToSlidingWindowP(windowDef, aggrOp, KeyedWindowResult::new);
// new supplier to save the last supplied instance
stage1Supplier = () -> lastSuppliedStage1Processor = (SlidingWindowP<?, ?, ?, ?>) procSupplier1.get();
stage2Supplier = () -> lastSuppliedStage2Processor = (SlidingWindowP<?, ?, ?, ?>) procSupplier2.get();
}
use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.
the class JobRestartWithSnapshotTest method when_nodeDown_then_jobRestartsFromSnapshot.
@SuppressWarnings("unchecked")
private void when_nodeDown_then_jobRestartsFromSnapshot(boolean twoStage) throws Exception {
/*
Design of this test:
It uses a random partitioned generator of source events. The events are
Map.Entry(partitionId, timestamp). For each partition timestamps from
0..elementsInPartition are generated.
We start the test with two nodes and localParallelism(1) and 3 partitions
for source. Source instances generate items at the same rate of 10 per
second: this causes one instance to be twice as fast as the other in terms of
timestamp. The source processor saves partition offsets similarly to how
KafkaSources.kafka() and Sources.mapJournal() do.
After some time we shut down one instance. The job restarts from the
snapshot and all partitions are restored to single source processor
instance. Partition offsets are very different, so the source is written
in a way that it emits from the most-behind partition in order to not
emit late events from more ahead partitions.
Local parallelism of InsertWatermarkP is also 1 to avoid the edge case
when different instances of InsertWatermarkP might initialize with first
event in different frame and make them start the no-gap emission from
different WM, which might cause the SlidingWindowP downstream to miss
some of the first windows.
The sink writes to an IMap which is an idempotent sink.
The resulting contents of the sink map are compared to expected value.
*/
DAG dag = new DAG();
SlidingWindowPolicy wDef = SlidingWindowPolicy.tumblingWinPolicy(3);
AggregateOperation1<Object, LongAccumulator, Long> aggrOp = counting();
IMap<List<Long>, Long> result = instance1.getMap("result");
result.clear();
int numPartitions = 3;
int elementsInPartition = 250;
SupplierEx<Processor> sup = () -> new SequencesInPartitionsGeneratorP(numPartitions, elementsInPartition, true);
Vertex generator = dag.newVertex("generator", throttle(sup, 30)).localParallelism(1);
Vertex insWm = dag.newVertex("insWm", insertWatermarksP(eventTimePolicy(o -> ((Entry<Integer, Integer>) o).getValue(), limitingLag(0), wDef.frameSize(), wDef.frameOffset(), 0))).localParallelism(1);
Vertex map = dag.newVertex("map", mapP((KeyedWindowResult kwr) -> entry(asList(kwr.end(), (long) (int) kwr.key()), kwr.result())));
Vertex writeMap = dag.newVertex("writeMap", SinkProcessors.writeMapP("result"));
if (twoStage) {
Vertex aggregateStage1 = dag.newVertex("aggregateStage1", Processors.accumulateByFrameP(singletonList((FunctionEx<? super Object, ?>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, aggrOp.withIdentityFinish()));
Vertex aggregateStage2 = dag.newVertex("aggregateStage2", combineToSlidingWindowP(wDef, aggrOp, KeyedWindowResult::new));
dag.edge(between(insWm, aggregateStage1).partitioned(entryKey())).edge(between(aggregateStage1, aggregateStage2).distributed().partitioned(entryKey())).edge(between(aggregateStage2, map));
} else {
Vertex aggregate = dag.newVertex("aggregate", Processors.aggregateToSlidingWindowP(singletonList((FunctionEx<Object, Integer>) t -> ((Entry<Integer, Integer>) t).getKey()), singletonList(t1 -> ((Entry<Integer, Integer>) t1).getValue()), TimestampKind.EVENT, wDef, 0L, aggrOp, KeyedWindowResult::new));
dag.edge(between(insWm, aggregate).distributed().partitioned(entryKey())).edge(between(aggregate, map));
}
dag.edge(between(generator, insWm)).edge(between(map, writeMap));
JobConfig config = new JobConfig();
config.setProcessingGuarantee(EXACTLY_ONCE);
config.setSnapshotIntervalMillis(1200);
Job job = instance1.getJet().newJob(dag, config);
JobRepository jobRepository = new JobRepository(instance1);
int timeout = (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis() * 3) + 8);
waitForFirstSnapshot(jobRepository, job.getId(), timeout, false);
waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
// wait a little more to emit something, so that it will be overwritten in the sink map
Thread.sleep(300);
instance2.getLifecycleService().terminate();
// Now the job should detect member shutdown and restart from snapshot.
// Let's wait until the next snapshot appears.
waitForNextSnapshot(jobRepository, job.getId(), (int) (MILLISECONDS.toSeconds(config.getSnapshotIntervalMillis()) + 10), false);
waitForNextSnapshot(jobRepository, job.getId(), timeout, false);
job.join();
// compute expected result
Map<List<Long>, Long> expectedMap = new HashMap<>();
for (long partition = 0; partition < numPartitions; partition++) {
long cnt = 0;
for (long value = 1; value <= elementsInPartition; value++) {
cnt++;
if (value % wDef.frameSize() == 0) {
expectedMap.put(asList(value, partition), cnt);
cnt = 0;
}
}
if (cnt > 0) {
expectedMap.put(asList(wDef.higherFrameTs(elementsInPartition - 1), partition), cnt);
}
}
// check expected result
if (!expectedMap.equals(result)) {
System.out.println("All expected entries: " + expectedMap.entrySet().stream().map(Object::toString).collect(joining(", ")));
System.out.println("All actual entries: " + result.entrySet().stream().map(Object::toString).collect(joining(", ")));
System.out.println("Non-received expected items: " + expectedMap.keySet().stream().filter(key -> !result.containsKey(key)).map(Object::toString).collect(joining(", ")));
System.out.println("Received non-expected items: " + result.entrySet().stream().filter(entry -> !expectedMap.containsKey(entry.getKey())).map(Object::toString).collect(joining(", ")));
System.out.println("Different keys: ");
for (Entry<List<Long>, Long> rEntry : result.entrySet()) {
Long expectedValue = expectedMap.get(rEntry.getKey());
if (expectedValue != null && !expectedValue.equals(rEntry.getValue())) {
System.out.println("key: " + rEntry.getKey() + ", expected value: " + expectedValue + ", actual value: " + rEntry.getValue());
}
}
System.out.println("-- end of different keys");
assertEquals(expectedMap, new HashMap<>(result));
}
}
use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.
the class ExecutionLifecycleTest method when_dataSerializable_processorSupplier_notSerializable_then_jobFails.
@Test
public void when_dataSerializable_processorSupplier_notSerializable_then_jobFails() {
DAG dag = new DAG();
dag.newVertex("v", ProcessorMetaSupplier.of((FunctionEx<? super Address, ? extends ProcessorSupplier>) address -> new NotSerializable_DataSerializable_ProcessorSupplier()));
Job job = newJob(dag);
Exception e = assertThrows(Exception.class, () -> job.join());
assertContains(e.getMessage(), "Failed to serialize");
}
use of com.hazelcast.function.FunctionEx in project hazelcast by hazelcast.
the class S3Sources method s3.
/**
* Creates an AWS S3 {@link BatchSource} which lists all the objects in the
* bucket-list using given {@code prefix}, reads them line by line,
* transforms each line to the desired output object using given {@code
* mapFn} and emits them to downstream.
* <p>
* The source does not save any state to snapshot. If the job is restarted,
* it will re-emit all entries.
* <p>
* The default local parallelism for this processor is 2.
* <p>
* Here is an example which reads the objects from a single bucket with
* applying the given prefix.
*
* <pre>{@code
* Pipeline p = Pipeline.create();
* BatchStage<String> srcStage = p.readFrom(S3Sources.s3(
* Arrays.asList("bucket1", "bucket2"),
* "prefix",
* StandardCharsets.UTF_8,
* () -> S3Client.create(),
* (filename, line) -> line
* ));
* }</pre>
*
* @param bucketNames list of bucket-names
* @param prefix the prefix to filter the objects. Optional, passing
* {@code null} will list all objects.
* @param clientSupplier function which returns the s3 client to use
* one client per processor instance is used
* @param mapFn the function which creates output object from each
* line. Gets the object name and line as parameters
* @param <T> the type of the items the source emits
*/
@Nonnull
public static <T> BatchSource<T> s3(@Nonnull List<String> bucketNames, @Nullable String prefix, @Nonnull Charset charset, @Nonnull SupplierEx<? extends S3Client> clientSupplier, @Nonnull BiFunctionEx<String, String, ? extends T> mapFn) {
String charsetName = charset.name();
FunctionEx<InputStream, Stream<String>> readFileFn = responseInputStream -> {
BufferedReader reader = new BufferedReader(new InputStreamReader(responseInputStream, Charset.forName(charsetName)));
return reader.lines();
};
return s3(bucketNames, prefix, clientSupplier, readFileFn, mapFn);
}
Aggregations