Search in sources :

Example 11 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class CogroupedKStreamImplTest method shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroups.

@Test
public void shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroups() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> stream1 = builder.stream("one", stringConsumed);
    final KStream<String, String> stream2 = builder.stream("two", stringConsumed);
    final KStream<String, String> stream3 = builder.stream("three", stringConsumed);
    final KGroupedStream<String, String> groupedOne = stream1.map((k, v) -> new KeyValue<>(v, k)).groupByKey();
    final KGroupedStream<String, String> groupedTwo = stream2.groupByKey();
    final KGroupedStream<String, String> groupedThree = stream3.groupByKey();
    groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedThree, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
    groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedTwo, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
    final String topologyDescription = builder.build().describe().toString();
    assertThat(topologyDescription, equalTo("Topologies:\n" + "   Sub-topology: 0\n" + "    Source: KSTREAM-SOURCE-0000000000 (topics: [one])\n" + "      --> KSTREAM-MAP-0000000003\n" + "    Processor: KSTREAM-MAP-0000000003 (stores: [])\n" + "      --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter, COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter\n" + "      <-- KSTREAM-SOURCE-0000000000\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter (stores: [])\n" + "      --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink\n" + "      <-- KSTREAM-MAP-0000000003\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter (stores: [])\n" + "      --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-sink\n" + "      <-- KSTREAM-MAP-0000000003\n" + "    Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition)\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n" + "    Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition)\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-filter\n\n" + "  Sub-topology: 1\n" + "    Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000015\n" + "    Source: KSTREAM-SOURCE-0000000001 (topics: [two])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000016\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000015 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + "      --> COGROUPKSTREAM-MERGE-0000000017\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011-repartition-source\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000016 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + "      --> COGROUPKSTREAM-MERGE-0000000017\n" + "      <-- KSTREAM-SOURCE-0000000001\n" + "    Processor: COGROUPKSTREAM-MERGE-0000000017 (stores: [])\n" + "      --> none\n" + "      <-- COGROUPKSTREAM-AGGREGATE-0000000015, COGROUPKSTREAM-AGGREGATE-0000000016\n\n" + "  Sub-topology: 2\n" + "    Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000008\n" + "    Source: KSTREAM-SOURCE-0000000002 (topics: [three])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000009\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000008 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + "      --> COGROUPKSTREAM-MERGE-0000000010\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000009 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + "      --> COGROUPKSTREAM-MERGE-0000000010\n" + "      <-- KSTREAM-SOURCE-0000000002\n" + "    Processor: COGROUPKSTREAM-MERGE-0000000010 (stores: [])\n" + "      --> none\n" + "      <-- COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000009\n\n"));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) MockInitializer(org.apache.kafka.test.MockInitializer) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) Window(org.apache.kafka.streams.kstream.Window) KStream(org.apache.kafka.streams.kstream.KStream) Initializer(org.apache.kafka.streams.kstream.Initializer) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Named(org.apache.kafka.streams.kstream.Named) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) TestRecord(org.apache.kafka.streams.test.TestRecord) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) MockAggregator(org.apache.kafka.test.MockAggregator) Bytes(org.apache.kafka.common.utils.Bytes) SlidingWindows(org.apache.kafka.streams.kstream.SlidingWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) CogroupedKStream(org.apache.kafka.streams.kstream.CogroupedKStream) Windows(org.apache.kafka.streams.kstream.Windows) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) KeyValue(org.apache.kafka.streams.KeyValue) Test(org.junit.Test)

Example 12 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class CogroupedKStreamImplTest method shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroupsWithOptimization.

@Test
public void shouldInsertRepartitionsTopicForUpstreamKeyModificationWithGroupedReusedInDifferentCogroupsWithOptimization() {
    final StreamsBuilder builder = new StreamsBuilder();
    final Properties properties = new Properties();
    properties.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
    final KStream<String, String> stream1 = builder.stream("one", stringConsumed);
    final KStream<String, String> stream2 = builder.stream("two", stringConsumed);
    final KStream<String, String> stream3 = builder.stream("three", stringConsumed);
    final KGroupedStream<String, String> groupedOne = stream1.map((k, v) -> new KeyValue<>(v, k)).groupByKey();
    final KGroupedStream<String, String> groupedTwo = stream2.groupByKey();
    final KGroupedStream<String, String> groupedThree = stream3.groupByKey();
    groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedThree, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
    groupedOne.cogroup(STRING_AGGREGATOR).cogroup(groupedTwo, STRING_AGGREGATOR).aggregate(STRING_INITIALIZER);
    final String topologyDescription = builder.build(properties).describe().toString();
    assertThat(topologyDescription, equalTo("Topologies:\n" + "   Sub-topology: 0\n" + "    Source: KSTREAM-SOURCE-0000000000 (topics: [one])\n" + "      --> KSTREAM-MAP-0000000003\n" + "    Processor: KSTREAM-MAP-0000000003 (stores: [])\n" + "      --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n" + "      <-- KSTREAM-SOURCE-0000000000\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter (stores: [])\n" + "      --> COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink\n" + "      <-- KSTREAM-MAP-0000000003\n" + "    Sink: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-sink (topic: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition)\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-filter\n\n" + "  Sub-topology: 1\n" + "    Source: COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source (topics: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000015\n" + "    Source: KSTREAM-SOURCE-0000000001 (topics: [two])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000016\n" + "    Source: KSTREAM-SOURCE-0000000002 (topics: [three])\n" + "      --> COGROUPKSTREAM-AGGREGATE-0000000009\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000008 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + "      --> COGROUPKSTREAM-MERGE-0000000010\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000009 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004])\n" + "      --> COGROUPKSTREAM-MERGE-0000000010\n" + "      <-- KSTREAM-SOURCE-0000000002\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000015 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + "      --> COGROUPKSTREAM-MERGE-0000000017\n" + "      <-- COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000004-repartition-source\n" + "    Processor: COGROUPKSTREAM-AGGREGATE-0000000016 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000011])\n" + "      --> COGROUPKSTREAM-MERGE-0000000017\n" + "      <-- KSTREAM-SOURCE-0000000001\n" + "    Processor: COGROUPKSTREAM-MERGE-0000000010 (stores: [])\n" + "      --> none\n" + "      <-- COGROUPKSTREAM-AGGREGATE-0000000008, COGROUPKSTREAM-AGGREGATE-0000000009\n" + "    Processor: COGROUPKSTREAM-MERGE-0000000017 (stores: [])\n" + "      --> none\n" + "      <-- COGROUPKSTREAM-AGGREGATE-0000000015, COGROUPKSTREAM-AGGREGATE-0000000016\n\n"));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) MockInitializer(org.apache.kafka.test.MockInitializer) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) Window(org.apache.kafka.streams.kstream.Window) KStream(org.apache.kafka.streams.kstream.KStream) Initializer(org.apache.kafka.streams.kstream.Initializer) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Named(org.apache.kafka.streams.kstream.Named) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) TestRecord(org.apache.kafka.streams.test.TestRecord) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) MockAggregator(org.apache.kafka.test.MockAggregator) Bytes(org.apache.kafka.common.utils.Bytes) SlidingWindows(org.apache.kafka.streams.kstream.SlidingWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) CogroupedKStream(org.apache.kafka.streams.kstream.CogroupedKStream) Windows(org.apache.kafka.streams.kstream.Windows) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) Test(org.junit.Test)

Example 13 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class SmokeTestClient method getTopology.

public Topology getTopology() {
    final StreamsBuilder builder = new StreamsBuilder();
    final Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
    final KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
    source.filterNot((k, v) -> k.equals("flush")).to("echo", Produced.with(stringSerde, intSerde));
    final KStream<String, Integer> data = source.filter((key, value) -> value == null || value != END);
    data.process(SmokeTestUtil.printProcessorSupplier("data", name));
    // min
    final KGroupedStream<String, Integer> groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde));
    final KTable<Windowed<String>, Integer> minAggregation = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1))).aggregate(() -> Integer.MAX_VALUE, (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-min").withValueSerde(intSerde).withRetention(Duration.ofHours(25)));
    streamify(minAggregation, "min-raw");
    streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed");
    minAggregation.toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("min", Produced.with(stringSerde, intSerde));
    final KTable<Windowed<String>, Integer> smallWindowSum = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1))).reduce(Integer::sum);
    streamify(smallWindowSum, "sws-raw");
    streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed");
    final KTable<String, Integer> minTable = builder.table("min", Consumed.with(stringSerde, intSerde), Materialized.as("minStoreName"));
    minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name));
    // max
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> Integer.MIN_VALUE, (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-max").withValueSerde(intSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("max", Produced.with(stringSerde, intSerde));
    final KTable<String, Integer> maxTable = builder.table("max", Consumed.with(stringSerde, intSerde), Materialized.as("maxStoreName"));
    maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name));
    // sum
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> 0L, (aggKey, value, aggregate) -> (long) value + aggregate, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("win-sum").withValueSerde(longSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("sum", Produced.with(stringSerde, longSerde));
    final Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
    final KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
    sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name));
    // cnt
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).count(Materialized.as("uwin-cnt")).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("cnt", Produced.with(stringSerde, longSerde));
    final KTable<String, Long> cntTable = builder.table("cnt", Consumed.with(stringSerde, longSerde), Materialized.as("cntStoreName"));
    cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name));
    // dif
    maxTable.join(minTable, (value1, value2) -> value1 - value2).toStream().filterNot((k, v) -> k.equals("flush")).to("dif", Produced.with(stringSerde, intSerde));
    // avg
    sumTable.join(cntTable, (value1, value2) -> (double) value1 / (double) value2).toStream().filterNot((k, v) -> k.equals("flush")).to("avg", Produced.with(stringSerde, doubleSerde));
    // test repartition
    final Agg agg = new Agg();
    cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde)).aggregate(agg.init(), agg.adder(), agg.remover(), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().to("tagg", Produced.with(stringSerde, longSerde));
    return builder.build();
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Produced(org.apache.kafka.streams.kstream.Produced) Stores(org.apache.kafka.streams.state.Stores) KStream(org.apache.kafka.streams.kstream.KStream) WindowStore(org.apache.kafka.streams.state.WindowStore) Suppressed.untilWindowCloses(org.apache.kafka.streams.kstream.Suppressed.untilWindowCloses) Windowed(org.apache.kafka.streams.kstream.Windowed) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) BufferConfig(org.apache.kafka.streams.kstream.Suppressed.BufferConfig) Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) Files(java.nio.file.Files) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) Instant(java.time.Instant) Grouped(org.apache.kafka.streams.kstream.Grouped) File(java.io.File) Bytes(org.apache.kafka.common.utils.Bytes) KafkaThread(org.apache.kafka.common.utils.KafkaThread) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed) Bytes(org.apache.kafka.common.utils.Bytes)

Example 14 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class QueryableStateIntegrationTest method createCountStream.

/**
 * Creates a typical word count topology
 */
private KafkaStreams createCountStream(final String inputTopic, final String outputTopic, final String windowOutputTopic, final String storeName, final String windowStoreName, final Properties streamsConfiguration) {
    final StreamsBuilder builder = new StreamsBuilder();
    final Serde<String> stringSerde = Serdes.String();
    final KStream<String, String> textLines = builder.stream(inputTopic, Consumed.with(stringSerde, stringSerde));
    final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues((ValueMapper<String, Iterable<String>>) value -> Arrays.asList(value.split("\\W+"))).groupBy(MockMapper.selectValueMapper());
    // Create a State Store for the all time word count
    groupedByWord.count(Materialized.as(storeName + "-" + inputTopic)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
    // Create a Windowed State Store that contains the word count for every 1 minute
    groupedByWord.windowedBy(TimeWindows.of(ofMillis(WINDOW_SIZE))).count(Materialized.as(windowStoreName + "-" + inputTopic)).toStream((key, value) -> key.key()).to(windowOutputTopic, Produced.with(Serdes.String(), Serdes.Long()));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Arrays(java.util.Arrays) Utils.mkProperties(org.apache.kafka.common.utils.Utils.mkProperties) MockTime(kafka.utils.MockTime) Instant.ofEpochMilli(java.time.Instant.ofEpochMilli) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) StoreQueryParameters.fromNameAndType(org.apache.kafka.streams.StoreQueryParameters.fromNameAndType) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) StreamsTestUtils.startKafkaStreamsAndWaitForRunningState(org.apache.kafka.test.StreamsTestUtils.startKafkaStreamsAndWaitForRunningState) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) State(org.apache.kafka.streams.KafkaStreams.State) Category(org.junit.experimental.categories.Category) KafkaStreamsTest(org.apache.kafka.streams.KafkaStreamsTest) QueryableStoreTypes(org.apache.kafka.streams.state.QueryableStoreTypes) Predicate(org.apache.kafka.streams.kstream.Predicate) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) Matchers.is(org.hamcrest.Matchers.is) ReadOnlyKeyValueStore(org.apache.kafka.streams.state.ReadOnlyKeyValueStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeSet(java.util.TreeSet) UnknownStateStoreException(org.apache.kafka.streams.errors.UnknownStateStoreException) ArrayList(java.util.ArrayList) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) KTable(org.apache.kafka.streams.kstream.KTable) IntegrationTestUtils.waitForApplicationState(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState) Properties(java.util.Properties) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Assert.assertNull(org.junit.Assert.assertNull) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) StringReader(java.io.StringReader) TreeMap(java.util.TreeMap) IntegrationTestUtils.getRunningStreams(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.getRunningStreams) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KafkaStreams(org.apache.kafka.streams.KafkaStreams) BufferedReader(java.io.BufferedReader) ReadOnlySessionStore(org.apache.kafka.streams.state.ReadOnlySessionStore) Assert.assertEquals(org.junit.Assert.assertEquals) QueryableStoreTypes.sessionStore(org.apache.kafka.streams.state.QueryableStoreTypes.sessionStore) QueryableStoreTypes.keyValueStore(org.apache.kafka.streams.state.QueryableStoreTypes.keyValueStore) Produced(org.apache.kafka.streams.kstream.Produced) LoggerFactory(org.slf4j.LoggerFactory) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) Serde(org.apache.kafka.common.serialization.Serde) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) MockMapper(org.apache.kafka.test.MockMapper) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Bytes(org.apache.kafka.common.utils.Bytes) Objects(java.util.Objects) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) Entry(java.util.Map.Entry) Duration.ofMillis(java.time.Duration.ofMillis) InvalidStateStoreException(org.apache.kafka.streams.errors.InvalidStateStoreException) StreamsConfig(org.apache.kafka.streams.StreamsConfig) ReadOnlyWindowStore(org.apache.kafka.streams.state.ReadOnlyWindowStore) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) IntegrationTestUtils.startApplicationAndWaitUntilRunning(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.startApplicationAndWaitUntilRunning) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) IntegrationTest(org.apache.kafka.test.IntegrationTest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) KStream(org.apache.kafka.streams.kstream.KStream) Duration.ofSeconds(java.time.Duration.ofSeconds) NoRetryException(org.apache.kafka.test.NoRetryException) HashSet(java.util.HashSet) TestUtils.retryOnExceptionWithTimeout(org.apache.kafka.test.TestUtils.retryOnExceptionWithTimeout) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper) PrintStream(java.io.PrintStream) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) Matchers(org.hamcrest.Matchers) TimeUnit(java.util.concurrent.TimeUnit) KeyValueIterator(org.apache.kafka.streams.state.KeyValueIterator) Rule(org.junit.Rule) LagInfo(org.apache.kafka.streams.LagInfo) WindowStoreIterator(org.apache.kafka.streams.state.WindowStoreIterator) FileReader(java.io.FileReader) Comparator(java.util.Comparator) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper)

Example 15 with KGroupedStream

use of org.apache.kafka.streams.kstream.KGroupedStream in project kafka by apache.

the class SuppressScenarioTest method shouldWorkWithCogrouped.

@Test
public void shouldWorkWithCogrouped() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KGroupedStream<String, String> stream1 = builder.stream("one", Consumed.with(Serdes.String(), Serdes.String())).groupByKey(Grouped.with(Serdes.String(), Serdes.String()));
    final KGroupedStream<String, String> stream2 = builder.stream("two", Consumed.with(Serdes.String(), Serdes.String())).groupByKey(Grouped.with(Serdes.String(), Serdes.String()));
    final KStream<Windowed<String>, Object> cogrouped = stream1.cogroup((key, value, aggregate) -> aggregate + value).cogroup(stream2, (key, value, aggregate) -> aggregate + value).windowedBy(TimeWindows.of(Duration.ofMinutes(15))).aggregate(() -> "", Named.as("test"), Materialized.as("store")).suppress(Suppressed.untilWindowCloses(unbounded())).toStream();
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed) Produced(org.apache.kafka.streams.kstream.Produced) Collections.singletonList(java.util.Collections.singletonList) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Serde(org.apache.kafka.common.serialization.Serde) Arrays.asList(java.util.Arrays.asList) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) BufferConfig.maxRecords(org.apache.kafka.streams.kstream.Suppressed.BufferConfig.maxRecords) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) TestRecord(org.apache.kafka.streams.test.TestRecord) TestUtils(org.apache.kafka.test.TestUtils) Collections.emptyList(java.util.Collections.emptyList) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Bytes(org.apache.kafka.common.utils.Bytes) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) ZERO(java.time.Duration.ZERO) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) BufferConfig.unbounded(org.apache.kafka.streams.kstream.Suppressed.BufferConfig.unbounded) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) KStream(org.apache.kafka.streams.kstream.KStream) BufferConfig.maxBytes(org.apache.kafka.streams.kstream.Suppressed.BufferConfig.maxBytes) WindowStore(org.apache.kafka.streams.state.WindowStore) Suppressed.untilWindowCloses(org.apache.kafka.streams.kstream.Suppressed.untilWindowCloses) Windowed(org.apache.kafka.streams.kstream.Windowed) Named(org.apache.kafka.streams.kstream.Named) Deserializer(org.apache.kafka.common.serialization.Deserializer) SessionStore(org.apache.kafka.streams.state.SessionStore) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Utils(org.apache.kafka.common.utils.Utils) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Iterator(java.util.Iterator) Consumed(org.apache.kafka.streams.kstream.Consumed) Suppressed(org.apache.kafka.streams.kstream.Suppressed) Test(org.junit.Test) KeyValueTimestamp(org.apache.kafka.streams.KeyValueTimestamp) Grouped(org.apache.kafka.streams.kstream.Grouped) SlidingWindows(org.apache.kafka.streams.kstream.SlidingWindows) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) TestInputTopic(org.apache.kafka.streams.TestInputTopic) Comparator(java.util.Comparator) Suppressed.untilTimeLimit(org.apache.kafka.streams.kstream.Suppressed.untilTimeLimit) Test(org.junit.Test)

Aggregations

KGroupedStream (org.apache.kafka.streams.kstream.KGroupedStream)17 Serdes (org.apache.kafka.common.serialization.Serdes)15 KStream (org.apache.kafka.streams.kstream.KStream)15 Properties (java.util.Properties)14 Bytes (org.apache.kafka.common.utils.Bytes)14 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)14 StreamsConfig (org.apache.kafka.streams.StreamsConfig)14 KeyValue (org.apache.kafka.streams.KeyValue)13 Materialized (org.apache.kafka.streams.kstream.Materialized)13 Test (org.junit.Test)13 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)12 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)12 Consumed (org.apache.kafka.streams.kstream.Consumed)12 KTable (org.apache.kafka.streams.kstream.KTable)12 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)12 TestInputTopic (org.apache.kafka.streams.TestInputTopic)11 TopologyTestDriver (org.apache.kafka.streams.TopologyTestDriver)11 Grouped (org.apache.kafka.streams.kstream.Grouped)11 Initializer (org.apache.kafka.streams.kstream.Initializer)11 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)11