Search in sources :

Example 36 with KStream

use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.

the class NamedTopologyIntegrationTest method shouldAddToEmptyInitialTopologyRemoveResetOffsetsThenAddSameNamedTopologyWithRepartitioning.

@Test
public void shouldAddToEmptyInitialTopologyRemoveResetOffsetsThenAddSameNamedTopologyWithRepartitioning() throws Exception {
    CLUSTER.createTopics(SUM_OUTPUT, COUNT_OUTPUT);
    // Build up named topology with two stateful subtopologies
    final KStream<String, Long> inputStream1 = topology1Builder.stream(INPUT_STREAM_1);
    inputStream1.map(KeyValue::new).groupByKey().count().toStream().to(COUNT_OUTPUT);
    inputStream1.map(KeyValue::new).groupByKey().reduce(Long::sum).toStream().to(SUM_OUTPUT);
    streams.start();
    final NamedTopology namedTopology = topology1Builder.build();
    streams.addNamedTopology(namedTopology).all().get();
    assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, COUNT_OUTPUT, 3), equalTo(COUNT_OUTPUT_DATA));
    assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, SUM_OUTPUT, 3), equalTo(SUM_OUTPUT_DATA));
    streams.removeNamedTopology(TOPOLOGY_1, true).all().get();
    streams.cleanUpNamedTopology(TOPOLOGY_1);
    CLUSTER.getAllTopicsInCluster().stream().filter(t -> t.contains("-changelog") || t.contains("-repartition")).forEach(t -> {
        try {
            CLUSTER.deleteTopicsAndWait(t);
        } catch (final InterruptedException e) {
            e.printStackTrace();
        }
    });
    final KStream<String, Long> inputStream = topology1BuilderDup.stream(INPUT_STREAM_1);
    inputStream.map(KeyValue::new).groupByKey().count().toStream().to(COUNT_OUTPUT);
    inputStream.map(KeyValue::new).groupByKey().reduce(Long::sum).toStream().to(SUM_OUTPUT);
    final NamedTopology namedTopologyDup = topology1BuilderDup.build();
    streams.addNamedTopology(namedTopologyDup).all().get();
    assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, COUNT_OUTPUT, 3), equalTo(COUNT_OUTPUT_DATA));
    assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, SUM_OUTPUT, 3), equalTo(SUM_OUTPUT_DATA));
    CLUSTER.deleteTopicsAndWait(SUM_OUTPUT, COUNT_OUTPUT);
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) DefaultKafkaClientSupplier(org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier) KafkaStreamsNamedTopologyWrapper(org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper) Stores(org.apache.kafka.streams.state.Stores) StreamsException(org.apache.kafka.streams.errors.StreamsException) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) Collections.singletonList(java.util.Collections.singletonList) NamedTopologyBuilder(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyBuilder) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) Collections.singleton(java.util.Collections.singleton) Arrays.asList(java.util.Arrays.asList) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) After(org.junit.After) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClientUtils.extractThreadId(org.apache.kafka.streams.processor.internals.ClientUtils.extractThreadId) MissingSourceTopicException(org.apache.kafka.streams.errors.MissingSourceTopicException) TopicPartition(org.apache.kafka.common.TopicPartition) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) StreamsMetadata(org.apache.kafka.streams.StreamsMetadata) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KafkaClientSupplier(org.apache.kafka.streams.KafkaClientSupplier) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) State(org.apache.kafka.streams.KafkaStreams.State) Collectors(java.util.stream.Collectors) Bytes(org.apache.kafka.common.utils.Bytes) QueryableStoreTypes(org.apache.kafka.streams.state.QueryableStoreTypes) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) Optional(java.util.Optional) AddNamedTopologyResult(org.apache.kafka.streams.processor.internals.namedtopology.AddNamedTopologyResult) Queue(java.util.Queue) Pattern(java.util.regex.Pattern) ReadOnlyKeyValueStore(org.apache.kafka.streams.state.ReadOnlyKeyValueStore) NamedTopology(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology) StreamsConfig(org.apache.kafka.streams.StreamsConfig) BeforeClass(org.junit.BeforeClass) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) CoreMatchers.not(org.hamcrest.CoreMatchers.not) NamedTopologyStoreQueryParameters(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyStoreQueryParameters) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) TestUtils.retryOnExceptionWithTimeout(org.apache.kafka.test.TestUtils.retryOnExceptionWithTimeout) KeyValue.pair(org.apache.kafka.streams.KeyValue.pair) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) LinkedList(java.util.LinkedList) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Before(org.junit.Before) IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived) KTable(org.apache.kafka.streams.kstream.KTable) IntegrationTestUtils.waitForApplicationState(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) Iterator(java.util.Iterator) Consumed(org.apache.kafka.streams.kstream.Consumed) StreamsMetadataImpl(org.apache.kafka.streams.state.internals.StreamsMetadataImpl) Test(org.junit.Test) RemoveNamedTopologyResult(org.apache.kafka.streams.processor.internals.namedtopology.RemoveNamedTopologyResult) NOT_AVAILABLE(org.apache.kafka.streams.KeyQueryMetadata.NOT_AVAILABLE) Rule(org.junit.Rule) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) LagInfo(org.apache.kafka.streams.LagInfo) UniqueTopicSerdeScope(org.apache.kafka.streams.utils.UniqueTopicSerdeScope) KeyValue(org.apache.kafka.streams.KeyValue) NamedTopology(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology) Test(org.junit.Test)

Example 37 with KStream

use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.

the class InternalTopicIntegrationTest method shouldGetToRunningWithWindowedTableInFKJ.

/*
     * This test just ensures that that the assignor does not get stuck during partition number resolution
     * for internal repartition topics. See KAFKA-10689
     */
@Test
public void shouldGetToRunningWithWindowedTableInFKJ() throws Exception {
    final String appID = APP_ID + "-windowed-FKJ";
    streamsProp.put(StreamsConfig.APPLICATION_ID_CONFIG, appID);
    final StreamsBuilder streamsBuilder = new StreamsBuilder();
    final KStream<String, String> inputTopic = streamsBuilder.stream(DEFAULT_INPUT_TOPIC);
    final KTable<String, String> inputTable = streamsBuilder.table(DEFAULT_INPUT_TABLE_TOPIC);
    inputTopic.groupBy((k, v) -> k, Grouped.with("GroupName", Serdes.String(), Serdes.String())).windowedBy(TimeWindows.of(Duration.ofMinutes(10))).aggregate(() -> "", (k, v, a) -> a + k).leftJoin(inputTable, v -> v, (x, y) -> x + y);
    final KafkaStreams streams = new KafkaStreams(streamsBuilder.build(), streamsProp);
    startApplicationAndWaitUntilRunning(singletonList(streams), Duration.ofSeconds(60));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Arrays(java.util.Arrays) ConfigEntry(org.apache.kafka.clients.admin.ConfigEntry) Collections.singletonList(java.util.Collections.singletonList) MockTime(kafka.utils.MockTime) Locale(java.util.Locale) Duration(java.time.Duration) ProcessorStateManager(org.apache.kafka.streams.processor.internals.ProcessorStateManager) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MockMapper(org.apache.kafka.test.MockMapper) AfterClass(org.junit.AfterClass) IntegrationTestUtils.waitForCompletion(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForCompletion) TestUtils(org.apache.kafka.test.TestUtils) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Category(org.junit.experimental.categories.Category) Bytes(org.apache.kafka.common.utils.Bytes) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) Duration.ofMillis(java.time.Duration.ofMillis) Config(org.apache.kafka.clients.admin.Config) StreamsConfig(org.apache.kafka.streams.StreamsConfig) IntegrationTestUtils.startApplicationAndWaitUntilRunning(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.startApplicationAndWaitUntilRunning) BeforeClass(org.junit.BeforeClass) IntegrationTest(org.apache.kafka.test.IntegrationTest) LogConfig(kafka.log.LogConfig) KStream(org.apache.kafka.streams.kstream.KStream) Duration.ofSeconds(java.time.Duration.ofSeconds) WindowStore(org.apache.kafka.streams.state.WindowStore) ConfigResource(org.apache.kafka.common.config.ConfigResource) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) Admin(org.apache.kafka.clients.admin.Admin) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Before(org.junit.Before) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) AdminClientConfig(org.apache.kafka.clients.admin.AdminClientConfig) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Grouped(org.apache.kafka.streams.kstream.Grouped) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KafkaStreams(org.apache.kafka.streams.KafkaStreams) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 38 with KStream

use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.

the class KStreamRepartitionIntegrationTest method shouldDeductNumberOfPartitionsFromRepartitionOperation.

@Test
public void shouldDeductNumberOfPartitionsFromRepartitionOperation() throws Exception {
    final String topicBMapperName = "topic-b-mapper";
    final int topicBNumberOfPartitions = 6;
    final String inputTopicRepartitionName = "join-repartition-test";
    final int inputTopicRepartitionedNumOfPartitions = 3;
    final long timestamp = System.currentTimeMillis();
    CLUSTER.createTopic(topicB, topicBNumberOfPartitions, 1);
    final List<KeyValue<Integer, String>> expectedRecords = Arrays.asList(new KeyValue<>(1, "A"), new KeyValue<>(2, "B"));
    sendEvents(timestamp, expectedRecords);
    sendEvents(topicB, timestamp, expectedRecords);
    final StreamsBuilder builder = new StreamsBuilder();
    final Repartitioned<Integer, String> inputTopicRepartitioned = Repartitioned.<Integer, String>as(inputTopicRepartitionName).withNumberOfPartitions(inputTopicRepartitionedNumOfPartitions);
    final KStream<Integer, String> topicBStream = builder.stream(topicB, Consumed.with(Serdes.Integer(), Serdes.String())).map(KeyValue::new, Named.as(topicBMapperName));
    builder.stream(inputTopic, Consumed.with(Serdes.Integer(), Serdes.String())).repartition(inputTopicRepartitioned).join(topicBStream, (value1, value2) -> value2, JoinWindows.of(Duration.ofSeconds(10))).to(outputTopic);
    builder.build(streamsConfiguration);
    startStreams(builder);
    assertEquals(inputTopicRepartitionedNumOfPartitions, getNumberOfPartitionsForTopic(toRepartitionTopicName(inputTopicRepartitionName)));
    assertEquals(inputTopicRepartitionedNumOfPartitions, getNumberOfPartitionsForTopic(toRepartitionTopicName(topicBMapperName)));
    validateReceivedMessages(new IntegerDeserializer(), new StringDeserializer(), expectedRecords);
}
Also used : Arrays(java.util.Arrays) Repartitioned(org.apache.kafka.streams.kstream.Repartitioned) AdminClient(org.apache.kafka.clients.admin.AdminClient) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Parameterized(org.junit.runners.Parameterized) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) State(org.apache.kafka.streams.KafkaStreams.State) Category(org.junit.experimental.categories.Category) Objects(java.util.Objects) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Pattern(java.util.regex.Pattern) ERROR(org.apache.kafka.streams.KafkaStreams.State.ERROR) StreamsConfig(org.apache.kafka.streams.StreamsConfig) BeforeClass(org.junit.BeforeClass) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) IntegrationTest(org.apache.kafka.test.IntegrationTest) KStream(org.apache.kafka.streams.kstream.KStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) Named(org.apache.kafka.streams.kstream.Named) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) TopicDescription(org.apache.kafka.clients.admin.TopicDescription) Deserializer(org.apache.kafka.common.serialization.Deserializer) Before(org.junit.Before) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Parameter(org.junit.runners.Parameterized.Parameter) Assert.assertNotNull(org.junit.Assert.assertNotNull) AdminClientConfig(org.apache.kafka.clients.admin.AdminClientConfig) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) TimeUnit(java.util.concurrent.TimeUnit) RUNNING(org.apache.kafka.streams.KafkaStreams.State.RUNNING) Rule(org.junit.Rule) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) REBALANCING(org.apache.kafka.streams.KafkaStreams.State.REBALANCING) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) KeyValue(org.apache.kafka.streams.KeyValue) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 39 with KStream

use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.

the class RepartitionOptimizingTest method runTest.

private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(INPUT_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceStream"));
    final KStream<String, String> mappedStream = sourceStream.map((k, v) -> KeyValue.pair(k.toUpperCase(Locale.getDefault()), v), Named.as("source-map"));
    mappedStream.filter((k, v) -> k.equals("B"), Named.as("process-filter")).mapValues(v -> v.toUpperCase(Locale.getDefault()), Named.as("process-mapValues")).process(() -> new SimpleProcessor(processorValueCollector), Named.as("process"));
    final KStream<String, Long> countStream = mappedStream.groupByKey(Grouped.as("count-groupByKey")).count(Named.as("count"), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("count-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream(Named.as("count-toStream"));
    countStream.to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("count-to"));
    mappedStream.groupByKey(Grouped.as("aggregate-groupByKey")).aggregate(initializer, aggregator, Named.as("aggregate"), Materialized.<String, Integer>as(Stores.inMemoryKeyValueStore("aggregate-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream(Named.as("aggregate-toStream")).to(AGGREGATION_TOPIC, Produced.with(Serdes.String(), Serdes.Integer()).withName("reduce-to"));
    // adding operators for case where the repartition node is further downstream
    mappedStream.filter((k, v) -> true, Named.as("reduce-filter")).peek((k, v) -> System.out.println(k + ":" + v), Named.as("reduce-peek")).groupByKey(Grouped.as("reduce-groupByKey")).reduce(reducer, Named.as("reducer"), Materialized.as(Stores.inMemoryKeyValueStore("reduce-store"))).toStream(Named.as("reduce-toStream")).to(REDUCE_TOPIC, Produced.with(Serdes.String(), Serdes.String()));
    mappedStream.filter((k, v) -> k.equals("A"), Named.as("join-filter")).join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(5000)), StreamJoined.<String, String, Long>with(Stores.inMemoryWindowStore("join-store", ofDays(1), ofMillis(10000), true), Stores.inMemoryWindowStore("other-join-store", ofDays(1), ofMillis(10000), true)).withName("join").withKeySerde(Serdes.String()).withValueSerde(Serdes.String()).withOtherValueSerde(Serdes.Long())).to(JOINED_TOPIC, Produced.as("join-to"));
    streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
    final Topology topology = builder.build(streamsConfiguration);
    topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
    final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_TOPIC, stringSerializer, stringSerializer);
    final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
    final TestOutputTopic<String, Integer> aggregationOutputTopic = topologyTestDriver.createOutputTopic(AGGREGATION_TOPIC, stringDeserializer, new IntegerDeserializer());
    final TestOutputTopic<String, String> reduceOutputTopic = topologyTestDriver.createOutputTopic(REDUCE_TOPIC, stringDeserializer, stringDeserializer);
    final TestOutputTopic<String, String> joinedOutputTopic = topologyTestDriver.createOutputTopic(JOINED_TOPIC, stringDeserializer, stringDeserializer);
    inputTopicA.pipeKeyValueList(getKeyValues());
    // Verify the topology
    final String topologyString = topology.describe().toString();
    if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
        assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
    } else {
        assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
    }
    // Verify the number of repartition topics
    assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
    // Verify the values collected by the processor
    assertThat(3, equalTo(processorValueCollector.size()));
    assertThat(processorValueCollector, equalTo(expectedCollectedProcessorValues));
    // Verify the expected output
    assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
    assertThat(aggregationOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedAggKeyValues)));
    assertThat(reduceOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedReduceKeyValues)));
    assertThat(joinedOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedJoinKeyValues)));
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Locale(java.util.Locale) Map(java.util.Map) Named(org.apache.kafka.streams.kstream.Named) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Record(org.apache.kafka.streams.processor.api.Record) Deserializer(org.apache.kafka.common.serialization.Deserializer) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Before(org.junit.Before) Duration.ofDays(java.time.Duration.ofDays) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) List(java.util.List) Serializer(org.apache.kafka.common.serialization.Serializer) Reducer(org.apache.kafka.streams.kstream.Reducer) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer)

Example 40 with KStream

use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.

the class RepartitionWithMergeOptimizingTest method runTest.

private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
    streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceAStream = builder.stream(INPUT_A_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceAStream"));
    final KStream<String, String> sourceBStream = builder.stream(INPUT_B_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceBStream"));
    final KStream<String, String> mappedAStream = sourceAStream.map((k, v) -> KeyValue.pair(v.split(":")[0], v), Named.as("mappedAStream"));
    final KStream<String, String> mappedBStream = sourceBStream.map((k, v) -> KeyValue.pair(v.split(":")[0], v), Named.as("mappedBStream"));
    final KStream<String, String> mergedStream = mappedAStream.merge(mappedBStream, Named.as("mergedStream"));
    mergedStream.groupByKey(Grouped.as("long-groupByKey")).count(Named.as("long-count"), Materialized.as(Stores.inMemoryKeyValueStore("long-store"))).toStream(Named.as("long-toStream")).to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("long-to"));
    mergedStream.groupByKey(Grouped.as("string-groupByKey")).count(Named.as("string-count"), Materialized.as(Stores.inMemoryKeyValueStore("string-store"))).toStream(Named.as("string-toStream")).mapValues(v -> v.toString(), Named.as("string-mapValues")).to(STRING_COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.String()).withName("string-to"));
    final Topology topology = builder.build(streamsConfiguration);
    topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
    final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_A_TOPIC, stringSerializer, stringSerializer);
    final TestInputTopic<String, String> inputTopicB = topologyTestDriver.createInputTopic(INPUT_B_TOPIC, stringSerializer, stringSerializer);
    final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
    final TestOutputTopic<String, String> stringCountOutputTopic = topologyTestDriver.createOutputTopic(STRING_COUNT_TOPIC, stringDeserializer, stringDeserializer);
    inputTopicA.pipeKeyValueList(getKeyValues());
    inputTopicB.pipeKeyValueList(getKeyValues());
    final String topologyString = topology.describe().toString();
    // Verify the topology
    if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
        assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
    } else {
        assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
    }
    // Verify the number of repartition topics
    assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
    // Verify the expected output
    assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
    assertThat(stringCountOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedStringCountKeyValues)));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) ArrayList(java.util.ArrayList) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Map(java.util.Map) Named(org.apache.kafka.streams.kstream.Named) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Deserializer(org.apache.kafka.common.serialization.Deserializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) List(java.util.List) Serializer(org.apache.kafka.common.serialization.Serializer) Materialized(org.apache.kafka.streams.kstream.Materialized) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Pattern(java.util.regex.Pattern) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology)

Aggregations

KStream (org.apache.kafka.streams.kstream.KStream)91 Serdes (org.apache.kafka.common.serialization.Serdes)83 Properties (java.util.Properties)82 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)80 Test (org.junit.Test)69 StreamsConfig (org.apache.kafka.streams.StreamsConfig)65 KeyValue (org.apache.kafka.streams.KeyValue)61 Consumed (org.apache.kafka.streams.kstream.Consumed)55 KTable (org.apache.kafka.streams.kstream.KTable)54 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)51 Materialized (org.apache.kafka.streams.kstream.Materialized)45 Duration (java.time.Duration)44 List (java.util.List)42 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)40 KafkaStreams (org.apache.kafka.streams.KafkaStreams)38 Arrays (java.util.Arrays)37 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)37 Assert.assertEquals (org.junit.Assert.assertEquals)37 Produced (org.apache.kafka.streams.kstream.Produced)36 Grouped (org.apache.kafka.streams.kstream.Grouped)35