Search in sources :

Example 1 with KafkaOutputDescriptor

use of org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor in project samza by apache.

the class SessionWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 2 with KafkaOutputDescriptor

use of org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor in project samza by apache.

the class TumblingWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 3 with KafkaOutputDescriptor

use of org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor in project samza by apache.

the class WindowExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<Integer> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
    MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    // create a tumbling window that outputs the number of message collected every 10 minutes.
    // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
    // for 1 minute.
    inputStream.window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde()).setLateTrigger(Triggers.any(Triggers.count(30000), Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window").map(WindowPane::getMessage).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) StreamApplication(org.apache.samza.application.StreamApplication) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) IntegerSerde(org.apache.samza.serializers.IntegerSerde) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 4 with KafkaOutputDescriptor

use of org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor in project samza by apache.

the class PageViewCounterExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
    pageViewEvents.window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null).setEarlyTrigger(Triggers.repeat(Triggers.count(5))).setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))).sendTo(pageViewEventPerMemberStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) StringSerde(org.apache.samza.serializers.StringSerde) PageViewCount(org.apache.samza.example.models.PageViewCount) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Example 5 with KafkaOutputDescriptor

use of org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor in project samza by apache.

the class KeyValueStoreExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));
    appDescriptor.withDefaultSystem(trackingSystem);
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);
    pageViewEvents.partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy").map(KV::getValue).flatMap(new MyStatsCounter()).map(stats -> KV.of(stats.memberId, stats)).sendTo(pageViewEventPerMember);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) Collection(java.util.Collection) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) TimeUnit(java.util.concurrent.TimeUnit) Context(org.apache.samza.context.Context) List(java.util.List) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Aggregations

StreamApplication (org.apache.samza.application.StreamApplication)10 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)10 Config (org.apache.samza.config.Config)10 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)10 ApplicationRunners (org.apache.samza.runtime.ApplicationRunners)10 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)10 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)10 KafkaOutputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor)10 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)10 CommandLine (org.apache.samza.util.CommandLine)10 KV (org.apache.samza.operators.KV)9 KVSerde (org.apache.samza.serializers.KVSerde)9 StringSerde (org.apache.samza.serializers.StringSerde)9 MessageStream (org.apache.samza.operators.MessageStream)8 Duration (java.time.Duration)7 OutputStream (org.apache.samza.operators.OutputStream)7 PageViewEvent (org.apache.samza.example.models.PageViewEvent)6 Windows (org.apache.samza.operators.windows.Windows)6 WindowPane (org.apache.samza.operators.windows.WindowPane)4 Triggers (org.apache.samza.operators.triggers.Triggers)3