use of org.apache.samza.test.operator.data.PageView in project samza by apache.
the class TestSchedulingApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka");
KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde);
final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd);
final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn());
MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde).containsInAnyOrder(Arrays.asList(new PageView("v1-complete", "p1", "u1"), new PageView("v2-complete", "p2", "u1"), new PageView("v3-complete", "p1", "u2"), new PageView("v4-complete", "p3", "u2")));
}
use of org.apache.samza.test.operator.data.PageView in project samza by apache.
the class RepartitionJoinWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
// offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
// before the application is run
Config config = appDescriptor.getConfig();
String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
});
intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
use of org.apache.samza.test.operator.data.PageView in project samza by apache.
the class TestRepartitionWindowApp method testRepartitionedSessionWindowCounter.
@Test
public void testRepartitionedSessionWindowCounter() throws Exception {
Map<Integer, List<KV<String, PageView>>> pageViews = new HashMap<>();
pageViews.put(0, ImmutableList.of(KV.of("userId1", new PageView("india", "5.com", "userId1")), KV.of("userId1", new PageView("india", "2.com", "userId1"))));
pageViews.put(1, ImmutableList.of(KV.of("userId2", new PageView("china", "4.com", "userId2")), KV.of("userId1", new PageView("india", "3.com", "userId1"))));
pageViews.put(2, ImmutableList.of(KV.of("userId1", new PageView("india", "1.com", "userId1"))));
InMemorySystemDescriptor sd = new InMemorySystemDescriptor(SYSTEM);
InMemoryInputDescriptor<KV<String, PageView>> inputDescriptor = sd.getInputDescriptor(INPUT_TOPIC, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
/*
* Technically, this should have a message type of KV, because a KV is passed to sendTo, but
* StreamAssert.containsInAnyOrder requires the type to match the output type of the actual messages. In
* high-level, sendTo splits up the KV, so the actual messages are just the "V" part of the KV.
* TestRunner only uses NoOpSerde anyways, so it doesn't matter if the typing isn't KV.
*/
InMemoryOutputDescriptor<String> outputDescriptor = sd.getOutputDescriptor(OUTPUT_TOPIC, new NoOpSerde<>());
TestRunner.of(new RepartitionWindowApp()).addInputStream(inputDescriptor, pageViews).addOutputStream(outputDescriptor, 1).addConfig("task.window.ms", "1000").run(Duration.ofSeconds(10));
StreamAssert.containsInAnyOrder(Arrays.asList("userId1 4", "userId2 1"), outputDescriptor, Duration.ofSeconds(1));
}
use of org.apache.samza.test.operator.data.PageView in project samza by apache.
the class SessionWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
use of org.apache.samza.test.operator.data.PageView in project samza by apache.
the class TumblingWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Aggregations