use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.
the class CogroupedKStreamImplTest method shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroups.
@Test
public void shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroups() {
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> stream1 = builder.stream("one", stringConsumed);
final KStream<String, String> stream2 = builder.stream("two", stringConsumed);
final KStream<String, String> stream3 = builder.stream("three", stringConsumed);
final KGroupedStream<String, String> groupedOne = stream1.map((k, v) -> new KeyValue<>(v, k)).groupByKey();
final KGroupedStream<String, String> groupedTwo = stream2.groupByKey();
final KGroupedStream<String, String> groupedThree = stream3.groupByKey();
groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedThree, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedTwo, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
final String topologyDescription = builder.build().describe().toString();
assertThat(topologyDescription, equalTo("Topologies:\n" + " Sub-topology: 0\n" + " Source: KSTREAM-SOURCE-0000000000 (topics: [one])\n" + " --> KSTREAM-MAP-0000000003\n" + " Processor: KSTREAM-MAP-0000000003 (stores: [])\n" + " --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter, COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter\n" + " <-- KSTREAM-SOURCE-0000000000\n" + " Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter (stores: [])\n" + " --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink\n" + " <-- KSTREAM-MAP-0000000003\n" + " Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter (stores: [])\n" + " --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-sink\n" + " <-- KSTREAM-MAP-0000000003\n" + " Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition)\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n" + " Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition)\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter\n\n" + " Sub-topology: 1\n" + " Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000015\n" + " Source: KSTREAM-SOURCE-0000000001 (topics: [two])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000016\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000015 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + " --> COGROUPKSTREAM-MERGE-0000000017\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-source\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000016 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + " --> COGROUPKSTREAM-MERGE-0000000017\n" + " <-- KSTREAM-SOURCE-0000000001\n" + " Processor: COGROUPKSTREAM-MERGE-0000000017 (stores: [])\n" + " --> none\n" + " <-- COGROUPKSTREAM-AGGREGATE-0000000015, COGROUPKSTREAM-AGGREGATE-0000000016\n\n" + " Sub-topology: 2\n" + " Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000008\n" + " Source: KSTREAM-SOURCE-0000000002 (topics: [three])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000009\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000008 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + " --> COGROUPKSTREAM-MERGE-0000000010\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000009 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + " --> COGROUPKSTREAM-MERGE-0000000010\n" + " <-- KSTREAM-SOURCE-0000000002\n" + " Processor: COGROUPKSTREAM-MERGE-0000000010 (stores: [])\n" + " --> none\n" + " <-- COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000009\n\n"));
}
use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.
the class CogroupedKStreamImplTest method shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroupsWithOptimization.
@Test
public void shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroupsWithOptimization() {
final StreamsBuilder builder = new StreamsBuilder();
final Properties properties = new Properties();
properties.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
final KStream<String, String> stream1 = builder.stream("one", stringConsumed);
final KStream<String, String> stream2 = builder.stream("two", stringConsumed);
final KStream<String, String> stream3 = builder.stream("three", stringConsumed);
final KGroupedStream<String, String> groupedOne = stream1.map((k, v) -> new KeyValue<>(v, k)).groupByKey();
final KGroupedStream<String, String> groupedTwo = stream2.groupByKey();
final KGroupedStream<String, String> groupedThree = stream3.groupByKey();
groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedThree, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedTwo, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
final String topologyDescription = builder.build(properties).describe().toString();
assertThat(topologyDescription, equalTo("Topologies:\n" + " Sub-topology: 0\n" + " Source: KSTREAM-SOURCE-0000000000 (topics: [one])\n" + " --> KSTREAM-MAP-0000000003\n" + " Processor: KSTREAM-MAP-0000000003 (stores: [])\n" + " --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n" + " <-- KSTREAM-SOURCE-0000000000\n" + " Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter (stores: [])\n" + " --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink\n" + " <-- KSTREAM-MAP-0000000003\n" + " Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition)\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n\n" + " Sub-topology: 1\n" + " Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000015\n" + " Source: KSTREAM-SOURCE-0000000001 (topics: [two])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000016\n" + " Source: KSTREAM-SOURCE-0000000002 (topics: [three])\n" + " --> COGROUPKSTREAM-AGGREGATE-0000000009\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000008 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + " --> COGROUPKSTREAM-MERGE-0000000010\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000009 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + " --> COGROUPKSTREAM-MERGE-0000000010\n" + " <-- KSTREAM-SOURCE-0000000002\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000015 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + " --> COGROUPKSTREAM-MERGE-0000000017\n" + " <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + " Processor: COGROUPKSTREAM-AGGREGATE-0000000016 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + " --> COGROUPKSTREAM-MERGE-0000000017\n" + " <-- KSTREAM-SOURCE-0000000001\n" + " Processor: COGROUPKSTREAM-MERGE-0000000010 (stores: [])\n" + " --> none\n" + " <-- COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000009\n" + " Processor: COGROUPKSTREAM-MERGE-0000000017 (stores: [])\n" + " --> none\n" + " <-- COGROUPKSTREAM-AGGREGATE-0000000015, COGROUPKSTREAM-AGGREGATE-0000000016\n\n"));
}
use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.
the class SmokeTestClient method getTopology.
public Topology getTopology() {
final StreamsBuilder builder = new StreamsBuilder();
final Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
final KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
source.filterNot((k, v) -> k.equals("flush")).to("echo", Produced.with(stringSerde, intSerde));
final KStream<String, Integer> data = source.filter((key, value) -> value == null || value != END);
data.process(SmokeTestUtil.printProcessorSupplier("data", name));
// min
final KGroupedStream<String, Integer> groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde));
final KTable<Windowed<String>, Integer> minAggregation = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1))).aggregate(() -> Integer.MAX_VALUE, (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-min").withValueSerde(intSerde).withRetention(Duration.ofHours(25)));
streamify(minAggregation, "min-raw");
streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed");
minAggregation.toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("min", Produced.with(stringSerde, intSerde));
final KTable<Windowed<String>, Integer> smallWindowSum = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1))).reduce(Integer::sum);
streamify(smallWindowSum, "sws-raw");
streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed");
final KTable<String, Integer> minTable = builder.table("min", Consumed.with(stringSerde, intSerde), Materialized.as("minStoreName"));
minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name));
// max
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> Integer.MIN_VALUE, (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-max").withValueSerde(intSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("max", Produced.with(stringSerde, intSerde));
final KTable<String, Integer> maxTable = builder.table("max", Consumed.with(stringSerde, intSerde), Materialized.as("maxStoreName"));
maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name));
// sum
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> 0L, (aggKey, value, aggregate) -> (long) value + aggregate, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("win-sum").withValueSerde(longSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("sum", Produced.with(stringSerde, longSerde));
final Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
final KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name));
// cnt
groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).count(Materialized.as("uwin-cnt")).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("cnt", Produced.with(stringSerde, longSerde));
final KTable<String, Long> cntTable = builder.table("cnt", Consumed.with(stringSerde, longSerde), Materialized.as("cntStoreName"));
cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name));
// dif
maxTable.join(minTable, (value1, value2) -> value1 - value2).toStream().filterNot((k, v) -> k.equals("flush")).to("dif", Produced.with(stringSerde, intSerde));
// avg
sumTable.join(cntTable, (value1, value2) -> (double) value1 / (double) value2).toStream().filterNot((k, v) -> k.equals("flush")).to("avg", Produced.with(stringSerde, doubleSerde));
// test repartition
final Agg agg = new Agg();
cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde)).aggregate(agg.init(), agg.adder(), agg.remover(), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().to("tagg", Produced.with(stringSerde, longSerde));
return builder.build();
}
use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.
the class QueryableStateIntegrationTest method createCountStream.
/**
* Creates a typical word count topology
*/
private KafkaStreams createCountStream(final String inputTopic, final String outputTopic, final String windowOutputTopic, final String storeName, final String windowStoreName, final Properties streamsConfiguration) {
final StreamsBuilder builder = new StreamsBuilder();
final Serde<String> stringSerde = Serdes.String();
final KStream<String, String> textLines = builder.stream(inputTopic, Consumed.with(stringSerde, stringSerde));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues((ValueMapper<String, Iterable<String>>) value -> Arrays.asList(value.split("\\W+"))).groupBy(MockMapper.selectValueMapper());
// Create a State Store for the all time word count
groupedByWord.count(Materialized.as(storeName + "-" + inputTopic)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
// Create a Windowed State Store that contains the word count for every 1 minute
groupedByWord.windowedBy(TimeWindows.of(ofMillis(WINDOW_SIZE))).count(Materialized.as(windowStoreName + "-" + inputTopic)).toStream((key, value) -> key.key()).to(windowOutputTopic, Produced.with(Serdes.String(), Serdes.Long()));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.
the class SuppressScenarioTest method shouldWorkWithCogrouped.
@Test
public void shouldWorkWithCogrouped() {
final StreamsBuilder builder = new StreamsBuilder();
final KGroupedStream<String, String> stream1 = builder.stream("one", Consumed.with(Serdes.String(), Serdes.String())).groupByKey(Grouped.with(Serdes.String(), Serdes.String()));
final KGroupedStream<String, String> stream2 = builder.stream("two", Consumed.with(Serdes.String(), Serdes.String())).groupByKey(Grouped.with(Serdes.String(), Serdes.String()));
final KStream<Windowed<String>, Object> cogrouped = stream1.cogroup((key, value, aggregate) -> aggregate + value).cogroup(stream2, (key, value, aggregate) -> aggregate + value).windowedBy(TimeWindows.of(Duration.ofMinutes(15))).aggregate(() -> "", Named.as("test"), Materialized.as("store")).suppress(Suppressed.untilWindowCloses(unbounded())).toStream();
}
Aggregations