use of com.hazelcast.jet.aggregate.AggregateOperation in project hazelcast-jet by hazelcast.
the class GrAggBuilder method buildStream.
@SuppressWarnings("unchecked")
public <A, R, OUT> StreamStage<OUT> buildStream(@Nonnull AggregateOperation<A, ? extends R> aggrOp, @Nonnull KeyedWindowResultFunction<? super K, ? super R, OUT> mapToOutputFn) {
List<Transform> upstreamTransforms = upstreamStages.stream().map(s -> s.transform).collect(toList());
JetEventFunctionAdapter fnAdapter = ADAPT_TO_JET_EVENT;
// Avoided Stream API here due to static typing issues
List<DistributedFunction<?, ? extends K>> adaptedKeyFns = new ArrayList<>();
for (DistributedFunction keyFn : keyFns) {
adaptedKeyFns.add(adaptKeyFn(keyFn));
}
Transform transform = new WindowGroupTransform<K, A, R, JetEvent<OUT>>(upstreamTransforms, wDef, adaptedKeyFns, adaptAggregateOperation(aggrOp), fnAdapter.adaptKeyedWindowResultFn(mapToOutputFn));
pipelineImpl.connect(upstreamTransforms, transform);
return new StreamStageImpl<>(transform, fnAdapter, pipelineImpl);
}
use of com.hazelcast.jet.aggregate.AggregateOperation in project hazelcast by hazelcast.
the class CreateDagVisitor method onSlidingWindowAggregate.
public Vertex onSlidingWindowAggregate(SlidingWindowAggregatePhysicalRel rel) {
FunctionEx<JetSqlRow, ?> groupKeyFn = rel.groupKeyFn();
AggregateOperation<?, JetSqlRow> aggregateOperation = rel.aggrOp();
Expression<?> timestampExpression = rel.timestampExpression();
ToLongFunctionEx<JetSqlRow> timestampFn = row -> WindowUtils.extractMillis(timestampExpression.eval(row.getRow(), MOCK_EEC));
SlidingWindowPolicy windowPolicy = rel.windowPolicyProvider().apply(MOCK_EEC);
KeyedWindowResultFunction<? super Object, ? super JetSqlRow, ?> resultMapping = rel.outputValueMapping();
if (rel.numStages() == 1) {
Vertex vertex = dag.newUniqueVertex("Sliding-Window-AggregateByKey", Processors.aggregateToSlidingWindowP(singletonList(groupKeyFn), singletonList(timestampFn), TimestampKind.EVENT, windowPolicy, 0, aggregateOperation, resultMapping));
connectInput(rel.getInput(), vertex, edge -> edge.distributeTo(localMemberAddress).allToOne(""));
return vertex;
} else {
assert rel.numStages() == 2;
Vertex vertex1 = dag.newUniqueVertex("Sliding-Window-AccumulateByKey", Processors.accumulateByFrameP(singletonList(groupKeyFn), singletonList(timestampFn), TimestampKind.EVENT, windowPolicy, aggregateOperation));
Vertex vertex2 = dag.newUniqueVertex("Sliding-Window-CombineByKey", Processors.combineToSlidingWindowP(windowPolicy, aggregateOperation, resultMapping));
connectInput(rel.getInput(), vertex1, edge -> edge.partitioned(groupKeyFn));
dag.edge(between(vertex1, vertex2).distributed().partitioned(entryKey()));
return vertex2;
}
}
use of com.hazelcast.jet.aggregate.AggregateOperation in project hazelcast by hazelcast.
the class RebalanceBatchStageTest method when_rebalanceAndGroupAggregateBuilderWithComplexAggrOp_then_singleStageAggregation.
@Test
public void when_rebalanceAndGroupAggregateBuilderWithComplexAggrOp_then_singleStageAggregation() {
// Given
FunctionEx<Integer, Integer> keyFn = i -> i % 10;
Iterator<Integer> sequence = IntStream.iterate(0, i -> i + 1).boxed().iterator();
List<Integer> input0 = streamFromIterator(sequence).limit(itemCount).collect(toList());
List<Integer> input1 = streamFromIterator(sequence).limit(itemCount).collect(toList());
List<Integer> input2 = streamFromIterator(sequence).limit(itemCount).collect(toList());
BatchStageWithKey<Integer, Integer> stage0 = batchStageFromList(input0).groupingKey(keyFn);
BatchStageWithKey<Integer, Integer> stage1 = batchStageFromList(input1).groupingKey(keyFn);
BatchStageWithKey<Integer, Integer> stage2Rebalanced = batchStageFromList(input2).rebalance().groupingKey(keyFn);
// When
GroupAggregateBuilder1<Integer, Integer> b = stage0.aggregateBuilder();
Tag<Integer> tag0_in = b.tag0();
Tag<Integer> tag1_in = b.add(stage1);
Tag<Integer> tag2_in = b.add(stage2Rebalanced);
CoAggregateOperationBuilder agb = coAggregateOperationBuilder();
Tag<Long> tag0 = agb.add(tag0_in, SUMMING);
Tag<Long> tag1 = agb.add(tag1_in, SUMMING);
Tag<Long> tag2 = agb.add(tag2_in, SUMMING);
AggregateOperation<Object[], ItemsByTag> aggrOp = agb.build();
BatchStage<Entry<Integer, ItemsByTag>> aggregated = b.build(aggrOp);
// Then
aggregated.writeTo(sink);
assertSingleStageAggregation();
execute();
Collector<Integer, ?, Map<Integer, Long>> groupAndSum = groupingBy(keyFn, summingLong(i -> i));
Map<Integer, Long> expectedMap0 = input0.stream().collect(groupAndSum);
Map<Integer, Long> expectedMap1 = input1.stream().collect(groupAndSum);
Map<Integer, Long> expectedMap2 = input2.stream().collect(groupAndSum);
assertEquals(streamToString(expectedMap0.entrySet().stream(), e -> FORMAT_FN_3.apply(e.getKey(), tuple3(e.getValue(), expectedMap1.get(e.getKey()), expectedMap2.get(e.getKey())))), streamToString(this.<Integer, ItemsByTag>sinkStreamOfEntry(), e -> FORMAT_FN_3.apply(e.getKey(), tuple3(e.getValue().get(tag0), e.getValue().get(tag1), e.getValue().get(tag2)))));
}
use of com.hazelcast.jet.aggregate.AggregateOperation in project hazelcast by hazelcast.
the class BatchAggregateTest method groupAggregateBuilder_withComplexAggrOp.
@Test
public void groupAggregateBuilder_withComplexAggrOp() {
// Given
GroupAggregateFixture fx = new GroupAggregateFixture();
BatchStageWithKey<Integer, Integer> stage0 = fx.srcStage0.groupingKey(fx.keyFn);
BatchStageWithKey<Integer, Integer> stage1 = fx.srcStage1().groupingKey(fx.keyFn);
BatchStageWithKey<Integer, Integer> stage2 = fx.srcStage2().groupingKey(fx.keyFn);
// When
GroupAggregateBuilder1<Integer, Integer> b = stage0.aggregateBuilder();
Tag<Integer> tag0_in = b.tag0();
Tag<Integer> tag1_in = b.add(stage1);
Tag<Integer> tag2_in = b.add(stage2);
CoAggregateOperationBuilder agb = coAggregateOperationBuilder();
Tag<Long> tag0 = agb.add(tag0_in, SUMMING);
Tag<Long> tag1 = agb.add(tag1_in, SUMMING);
Tag<Long> tag2 = agb.add(tag2_in, SUMMING);
AggregateOperation<Object[], ItemsByTag> aggrOp = agb.build();
BatchStage<Entry<Integer, ItemsByTag>> aggregated = b.build(aggrOp);
// Then
aggregated.writeTo(sink);
execute();
Map<Integer, Long> expectedMap0 = input.stream().collect(groupingBy(fx.keyFn, fx.collectOp));
Map<Integer, Long> expectedMap1 = input.stream().map(fx.mapFn1).collect(groupingBy(fx.keyFn, fx.collectOp));
Map<Integer, Long> expectedMap2 = input.stream().map(fx.mapFn2).collect(groupingBy(fx.keyFn, fx.collectOp));
assertEquals(streamToString(expectedMap0.entrySet().stream(), e -> FORMAT_FN_3.apply(e.getKey(), tuple3(e.getValue(), expectedMap1.get(e.getKey()), expectedMap2.get(e.getKey())))), streamToString(this.<Integer, ItemsByTag>sinkStreamOfEntry(), e -> FORMAT_FN_3.apply(e.getKey(), tuple3(e.getValue().get(tag0), e.getValue().get(tag1), e.getValue().get(tag2)))));
}
use of com.hazelcast.jet.aggregate.AggregateOperation in project hazelcast-jet by hazelcast.
the class GrAggBuilder method buildBatch.
@SuppressWarnings("unchecked")
public <A, R, OUT> BatchStage<OUT> buildBatch(@Nonnull AggregateOperation<A, ? extends R> aggrOp, @Nonnull DistributedBiFunction<? super K, ? super R, OUT> mapToOutputFn) {
List<Transform> upstreamTransforms = upstreamStages.stream().map(s -> s.transform).collect(toList());
Transform transform = new GroupTransform<>(upstreamTransforms, keyFns, aggrOp, mapToOutputFn);
pipelineImpl.connect(upstreamTransforms, transform);
return new BatchStageImpl<>(transform, pipelineImpl);
}
Aggregations