Search in sources :

Example 1 with UserPageAdClick

use of org.apache.samza.test.operator.data.UserPageAdClick in project samza by apache.

the class RepartitionJoinWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
    // before the application is run
    Config config = appDescriptor.getConfig();
    String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
    String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
    String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
    KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
    MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
    MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
    MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
    MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
    MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
    MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
    MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
    userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
    });
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
Also used : Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) PageView(org.apache.samza.test.operator.data.PageView) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) Config(org.apache.samza.config.Config) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope)

Aggregations

Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 StreamApplication (org.apache.samza.application.StreamApplication)1 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)1 Config (org.apache.samza.config.Config)1 KV (org.apache.samza.operators.KV)1 MessageStream (org.apache.samza.operators.MessageStream)1 JoinFunction (org.apache.samza.operators.functions.JoinFunction)1 IntermediateMessageStreamImpl (org.apache.samza.operators.stream.IntermediateMessageStreamImpl)1 Windows (org.apache.samza.operators.windows.Windows)1 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)1 KVSerde (org.apache.samza.serializers.KVSerde)1 StringSerde (org.apache.samza.serializers.StringSerde)1 OutgoingMessageEnvelope (org.apache.samza.system.OutgoingMessageEnvelope)1 SystemStream (org.apache.samza.system.SystemStream)1 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)1 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)1 TaskCoordinator (org.apache.samza.task.TaskCoordinator)1 AdClick (org.apache.samza.test.operator.data.AdClick)1