use of org.apache.samza.operators.OutputStream in project samza by apache.
the class TumblingWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class WindowExample method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
KafkaOutputDescriptor<Integer> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());
SupplierFunction<Integer> initialValue = () -> 0;
FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);
// create a tumbling window that outputs the number of message collected every 10 minutes.
// also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
// for 1 minute.
inputStream.window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde()).setLateTrigger(Triggers.any(Triggers.count(30000), Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window").map(WindowPane::getMessage).sendTo(outputStream);
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class PageViewCounterExample method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));
MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);
SupplierFunction<Integer> initialValue = () -> 0;
FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
pageViewEvents.window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null).setEarlyTrigger(Triggers.repeat(Triggers.count(5))).setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))).sendTo(pageViewEventPerMemberStream);
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class KeyValueStoreExample method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));
appDescriptor.withDefaultSystem(trackingSystem);
MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);
pageViewEvents.partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy").map(KV::getValue).flatMap(new MyStatsCounter()).map(stats -> KV.of(stats.memberId, stats)).sendTo(pageViewEventPerMember);
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithJoin.
private StreamGraphImpl createStreamGraphWithJoin() {
/**
* the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
*
* input1 (64) -> map -> join -> output1 (8)
* |
* input2 (16) -> partitionBy ("64") -> filter -|
* |
* input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
*
*/
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
BiFunction msgBuilder = mock(BiFunction.class);
MessageStream m1 = streamGraph.getInputStream("input1", msgBuilder).map(m -> m);
MessageStream m2 = streamGraph.getInputStream("input2", msgBuilder).partitionBy(m -> "haha").filter(m -> true);
MessageStream m3 = streamGraph.getInputStream("input3", msgBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> output1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
OutputStream<Object, Object, Object> output2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(output1);
m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(output2);
return streamGraph;
}
Aggregations