use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.
the class KGroupedTableImplTest method shouldReduce.
@Test
public void shouldReduce() {
final KeyValueMapper<String, Number, KeyValue<String, Integer>> intProjection = (key, value) -> KeyValue.pair(key, value.intValue());
final KTable<String, Integer> reduced = builder.table(topic, Consumed.with(Serdes.String(), Serdes.Double()), Materialized.<String, Double, KeyValueStore<Bytes, byte[]>>as("store").withKeySerde(Serdes.String()).withValueSerde(Serdes.Double())).groupBy(intProjection).reduce(MockReducer.INTEGER_ADDER, MockReducer.INTEGER_SUBTRACTOR, Materialized.as("reduced"));
final MockApiProcessorSupplier<String, Integer, Void, Void> supplier = getReducedResults(reduced);
try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
assertReduced(supplier.theCapturedProcessor().lastValueAndTimestampPerKey(), topic, driver);
assertEquals(reduced.queryableStoreName(), "reduced");
}
}
use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.
the class SmokeTestClient method getTopology.
public Topology getTopology() {
final StreamsBuilder builder = new StreamsBuilder();
final Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
final KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
source.filterNot((k, v) -> k.equals("flush")).to("echo", Produced.with(stringSerde, intSerde));
final KStream<String, Integer> data = source.filter((key, value) -> value == null || value != END);
data.process(SmokeTestUtil.printProcessorSupplier("data", name));
// min
final KGroupedStream<String, Integer> groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde));
final KTable<Windowed<String>, Integer> minAggregation = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1))).aggregate(() -> Integer.MAX_VALUE, (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-min").withValueSerde(intSerde).withRetention(Duration.ofHours(25)));
streamify(minAggregation, "min-raw");
streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed");
minAggregation.toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("min", Produced.with(stringSerde, intSerde));
final KTable<Windowed<String>, Integer> smallWindowSum = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1))).reduce(Integer::sum);
streamify(smallWindowSum, "sws-raw");
streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed");
final KTable<String, Integer> minTable = builder.table("min", Consumed.with(stringSerde, intSerde), Materialized.as("minStoreName"));
minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name));
// max
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> Integer.MIN_VALUE, (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-max").withValueSerde(intSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("max", Produced.with(stringSerde, intSerde));
final KTable<String, Integer> maxTable = builder.table("max", Consumed.with(stringSerde, intSerde), Materialized.as("maxStoreName"));
maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name));
// sum
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> 0L, (aggKey, value, aggregate) -> (long) value + aggregate, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("win-sum").withValueSerde(longSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("sum", Produced.with(stringSerde, longSerde));
final Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
final KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name));
// cnt
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).count(Materialized.as("uwin-cnt")).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("cnt", Produced.with(stringSerde, longSerde));
final KTable<String, Long> cntTable = builder.table("cnt", Consumed.with(stringSerde, longSerde), Materialized.as("cntStoreName"));
cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name));
// dif
maxTable.join(minTable, (value1, value2) -> value1 - value2).toStream().filterNot((k, v) -> k.equals("flush")).to("dif", Produced.with(stringSerde, intSerde));
// avg
sumTable.join(cntTable, (value1, value2) -> (double) value1 / (double) value2).toStream().filterNot((k, v) -> k.equals("flush")).to("avg", Produced.with(stringSerde, doubleSerde));
// test repartition
final Agg agg = new Agg();
cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde)).aggregate(agg.init(), agg.adder(), agg.remover(), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().to("tagg", Produced.with(stringSerde, longSerde));
return builder.build();
}
use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.
the class PageViewTypedDemo method main.
public static void main(final String[] args) {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-typed");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, JSONSerde.class);
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JSONSerde.class);
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, PageView> views = builder.stream("streams-pageview-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
final KTable<String, UserProfile> users = builder.table("streams-userprofile-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
final Duration duration24Hours = Duration.ofHours(24);
final KStream<WindowedPageViewByRegion, RegionCount> regionCount = views.leftJoin(users, (view, profile) -> {
final PageViewByRegion viewByRegion = new PageViewByRegion();
viewByRegion.user = view.user;
viewByRegion.page = view.page;
if (profile != null) {
viewByRegion.region = profile.region;
} else {
viewByRegion.region = "UNKNOWN";
}
return viewByRegion;
}).map((user, viewRegion) -> new KeyValue<>(viewRegion.region, viewRegion)).groupByKey(Grouped.with(Serdes.String(), new JSONSerde<>())).windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(7), duration24Hours).advanceBy(Duration.ofSeconds(1))).count().toStream().map((key, value) -> {
final WindowedPageViewByRegion wViewByRegion = new WindowedPageViewByRegion();
wViewByRegion.windowStart = key.window().start();
wViewByRegion.region = key.key();
final RegionCount rCount = new RegionCount();
rCount.region = key.key();
rCount.count = value;
return new KeyValue<>(wViewByRegion, rCount);
});
// write to the result topic
regionCount.to("streams-pageviewstats-typed-output", Produced.with(new JSONSerde<>(), new JSONSerde<>()));
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
e.printStackTrace();
System.exit(1);
}
System.exit(0);
}
use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.
the class StreamsPartitionAssignorTest method shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount.
@Test
public void shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount() {
builder = new CorruptedInternalTopologyBuilder();
topologyMetadata = new TopologyMetadata(builder, new StreamsConfig(configProps()));
final InternalStreamsBuilder streamsBuilder = new InternalStreamsBuilder(builder);
final KStream<String, String> inputTopic = streamsBuilder.stream(singleton("topic1"), new ConsumedInternal<>());
final KTable<String, String> inputTable = streamsBuilder.table("topic2", new ConsumedInternal<>(), new MaterializedInternal<>(Materialized.as("store")));
inputTopic.groupBy((k, v) -> k, Grouped.with("GroupName", Serdes.String(), Serdes.String())).windowedBy(TimeWindows.of(Duration.ofMinutes(10))).aggregate(() -> "", (k, v, a) -> a + k).leftJoin(inputTable, v -> v, (x, y) -> x + y);
streamsBuilder.buildAndOptimizeTopology();
configureDefault();
subscriptions.put("consumer", new Subscription(singletonList("topic"), defaultSubscriptionInfo.encode()));
final Map<String, Assignment> assignments = partitionAssignor.assign(metadata, new GroupSubscription(subscriptions)).groupAssignment();
assertThat(AssignmentInfo.decode(assignments.get("consumer").userData()).errCode(), equalTo(AssignorError.ASSIGNMENT_ERROR.code()));
}
use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.
the class PageViewUntypedDemo method main.
public static void main(final String[] args) throws Exception {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-untyped");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final StreamsBuilder builder = new StreamsBuilder();
final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
final Consumed<String, JsonNode> consumed = Consumed.with(Serdes.String(), jsonSerde);
final KStream<String, JsonNode> views = builder.stream("streams-pageview-input", consumed);
final KTable<String, JsonNode> users = builder.table("streams-userprofile-input", consumed);
final KTable<String, String> userRegions = users.mapValues(record -> record.get("region").textValue());
final Duration duration24Hours = Duration.ofHours(24);
final KStream<JsonNode, JsonNode> regionCount = views.leftJoin(userRegions, (view, region) -> {
final ObjectNode jNode = JsonNodeFactory.instance.objectNode();
return (JsonNode) jNode.put("user", view.get("user").textValue()).put("page", view.get("page").textValue()).put("region", region == null ? "UNKNOWN" : region);
}).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").textValue(), viewRegion)).groupByKey(Grouped.with(Serdes.String(), jsonSerde)).windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(7), duration24Hours).advanceBy(Duration.ofSeconds(1))).count().toStream().map((key, value) -> {
final ObjectNode keyNode = JsonNodeFactory.instance.objectNode();
keyNode.put("window-start", key.window().start()).put("region", key.key());
final ObjectNode valueNode = JsonNodeFactory.instance.objectNode();
valueNode.put("count", value);
return new KeyValue<>((JsonNode) keyNode, (JsonNode) valueNode);
});
// write to the result topic
regionCount.to("streams-pageviewstats-untyped-output", Produced.with(jsonSerde, jsonSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
streams.start();
// usually the stream application would be running forever,
// in this example we just let it run for some time and stop since the input data is finite.
Thread.sleep(5000L);
streams.close();
}
Aggregations