Search in sources :

Example 1 with StreamJoined

use of org.apache.kafka.streams.kstream.StreamJoined in project kafka by apache.

the class KStreamKStreamJoinTest method shouldCreateRepartitionTopicsWithUserProvidedName.

@Test
public void shouldCreateRepartitionTopicsWithUserProvidedName() {
    final StreamsBuilder builder = new StreamsBuilder();
    final Properties props = new Properties();
    props.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.NO_OPTIMIZATION);
    final KStream<String, String> stream1 = builder.stream("topic", Consumed.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> stream2 = builder.stream("topic2", Consumed.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> stream3 = builder.stream("topic3", Consumed.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> newStream = stream1.map((k, v) -> new KeyValue<>(v, k));
    final StreamJoined<String, String, String> streamJoined = StreamJoined.with(Serdes.String(), Serdes.String(), Serdes.String());
    newStream.join(stream2, (value1, value2) -> value1 + value2, JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(100)), streamJoined.withName("first-join")).to("out-one");
    newStream.join(stream3, (value1, value2) -> value1 + value2, JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(100)), streamJoined.withName("second-join")).to("out-two");
    final Topology topology = builder.build(props);
    System.out.println(topology.describe().toString());
    assertEquals(expectedTopologyWithUserNamedRepartitionTopics, topology.describe().toString());
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) InternalTopicConfig(org.apache.kafka.streams.processor.internals.InternalTopicConfig) Arrays(java.util.Arrays) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) Stores(org.apache.kafka.streams.state.Stores) SUBTOPOLOGY_0(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.SUBTOPOLOGY_0) StreamsException(org.apache.kafka.streams.errors.StreamsException) KStream(org.apache.kafka.streams.kstream.KStream) MockApiProcessor(org.apache.kafka.test.MockApiProcessor) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) HashSet(java.util.HashSet) Duration.ofHours(java.time.Duration.ofHours) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) TopologyWrapper(org.apache.kafka.streams.TopologyWrapper) Duration(java.time.Duration) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Instant(java.time.Instant) WindowBytesStoreSupplier(org.apache.kafka.streams.state.WindowBytesStoreSupplier) KeyValueTimestamp(org.apache.kafka.streams.KeyValueTimestamp) LogCaptureAppender(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender) TestInputTopic(org.apache.kafka.streams.TestInputTopic) InternalTopologyBuilder(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Collections(java.util.Collections) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) Topology(org.apache.kafka.streams.Topology) Properties(java.util.Properties) Test(org.junit.Test)

Example 2 with StreamJoined

use of org.apache.kafka.streams.kstream.StreamJoined in project kafka by apache.

the class RepartitionOptimizingTest method runTest.

private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(INPUT_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceStream"));
    final KStream<String, String> mappedStream = sourceStream.map((k, v) -> KeyValue.pair(k.toUpperCase(Locale.getDefault()), v), Named.as("source-map"));
    mappedStream.filter((k, v) -> k.equals("B"), Named.as("process-filter")).mapValues(v -> v.toUpperCase(Locale.getDefault()), Named.as("process-mapValues")).process(() -> new SimpleProcessor(processorValueCollector), Named.as("process"));
    final KStream<String, Long> countStream = mappedStream.groupByKey(Grouped.as("count-groupByKey")).count(Named.as("count"), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("count-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream(Named.as("count-toStream"));
    countStream.to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("count-to"));
    mappedStream.groupByKey(Grouped.as("aggregate-groupByKey")).aggregate(initializer, aggregator, Named.as("aggregate"), Materialized.<String, Integer>as(Stores.inMemoryKeyValueStore("aggregate-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream(Named.as("aggregate-toStream")).to(AGGREGATION_TOPIC, Produced.with(Serdes.String(), Serdes.Integer()).withName("reduce-to"));
    // adding operators for case where the repartition node is further downstream
    mappedStream.filter((k, v) -> true, Named.as("reduce-filter")).peek((k, v) -> System.out.println(k + ":" + v), Named.as("reduce-peek")).groupByKey(Grouped.as("reduce-groupByKey")).reduce(reducer, Named.as("reducer"), Materialized.as(Stores.inMemoryKeyValueStore("reduce-store"))).toStream(Named.as("reduce-toStream")).to(REDUCE_TOPIC, Produced.with(Serdes.String(), Serdes.String()));
    mappedStream.filter((k, v) -> k.equals("A"), Named.as("join-filter")).join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(5000)), StreamJoined.<String, String, Long>with(Stores.inMemoryWindowStore("join-store", ofDays(1), ofMillis(10000), true), Stores.inMemoryWindowStore("other-join-store", ofDays(1), ofMillis(10000), true)).withName("join").withKeySerde(Serdes.String()).withValueSerde(Serdes.String()).withOtherValueSerde(Serdes.Long())).to(JOINED_TOPIC, Produced.as("join-to"));
    streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
    final Topology topology = builder.build(streamsConfiguration);
    topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
    final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_TOPIC, stringSerializer, stringSerializer);
    final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
    final TestOutputTopic<String, Integer> aggregationOutputTopic = topologyTestDriver.createOutputTopic(AGGREGATION_TOPIC, stringDeserializer, new IntegerDeserializer());
    final TestOutputTopic<String, String> reduceOutputTopic = topologyTestDriver.createOutputTopic(REDUCE_TOPIC, stringDeserializer, stringDeserializer);
    final TestOutputTopic<String, String> joinedOutputTopic = topologyTestDriver.createOutputTopic(JOINED_TOPIC, stringDeserializer, stringDeserializer);
    inputTopicA.pipeKeyValueList(getKeyValues());
    // Verify the topology
    final String topologyString = topology.describe().toString();
    if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
        assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
    } else {
        assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
    }
    // Verify the number of repartition topics
    assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
    // Verify the values collected by the processor
    assertThat(3, equalTo(processorValueCollector.size()));
    assertThat(processorValueCollector, equalTo(expectedCollectedProcessorValues));
    // Verify the expected output
    assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
    assertThat(aggregationOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedAggKeyValues)));
    assertThat(reduceOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedReduceKeyValues)));
    assertThat(joinedOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedJoinKeyValues)));
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Locale(java.util.Locale) Map(java.util.Map) Named(org.apache.kafka.streams.kstream.Named) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Record(org.apache.kafka.streams.processor.api.Record) Deserializer(org.apache.kafka.common.serialization.Deserializer) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Before(org.junit.Before) Duration.ofDays(java.time.Duration.ofDays) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) List(java.util.List) Serializer(org.apache.kafka.common.serialization.Serializer) Reducer(org.apache.kafka.streams.kstream.Reducer) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer)

Example 3 with StreamJoined

use of org.apache.kafka.streams.kstream.StreamJoined in project kafka by apache.

the class KStreamImplTest method testNumProcesses.

// specifically testing the deprecated variant
@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testNumProcesses() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> source1 = builder.stream(Arrays.asList("topic-1", "topic-2"), stringConsumed);
    final KStream<String, String> source2 = builder.stream(Arrays.asList("topic-3", "topic-4"), stringConsumed);
    final KStream<String, String> stream1 = source1.filter((key, value) -> true).filterNot((key, value) -> false);
    final KStream<String, Integer> stream2 = stream1.mapValues((ValueMapper<String, Integer>) Integer::valueOf);
    final KStream<String, Integer> stream3 = source2.flatMapValues((ValueMapper<String, Iterable<Integer>>) value -> Collections.singletonList(Integer.valueOf(value)));
    final KStream<String, Integer>[] streams2 = stream2.branch((key, value) -> (value % 2) == 0, (key, value) -> true);
    final KStream<String, Integer>[] streams3 = stream3.branch((key, value) -> (value % 2) == 0, (key, value) -> true);
    final int anyWindowSize = 1;
    final StreamJoined<String, Integer, Integer> joined = StreamJoined.with(Serdes.String(), Serdes.Integer(), Serdes.Integer());
    final KStream<String, Integer> stream4 = streams2[0].join(streams3[0], Integer::sum, JoinWindows.of(ofMillis(anyWindowSize)), joined);
    streams2[1].join(streams3[1], Integer::sum, JoinWindows.of(ofMillis(anyWindowSize)), joined);
    stream4.to("topic-5");
    streams2[1].through("topic-6").process(new MockProcessorSupplier<>());
    streams2[1].repartition().process(new MockProcessorSupplier<>());
    assertEquals(// sources
    2 + // stream1
    2 + // stream2
    1 + // stream3
    1 + 1 + // streams2
    2 + 1 + // streams3
    2 + // stream2-stream3 joins
    5 * 2 + // to
    1 + // through
    2 + // process
    1 + // repartition
    3 + // process
    1, TopologyWrapper.getInternalTopologyBuilder(builder.build()).setApplicationId("X").buildTopology().processors().size());
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) CoreMatchers.is(org.hamcrest.CoreMatchers.is) Arrays(java.util.Arrays) ValueTransformerSupplier(org.apache.kafka.streams.kstream.ValueTransformerSupplier) Produced(org.apache.kafka.streams.kstream.Produced) IsInstanceOf.instanceOf(org.hamcrest.core.IsInstanceOf.instanceOf) Stores(org.apache.kafka.streams.state.Stores) Repartitioned(org.apache.kafka.streams.kstream.Repartitioned) MockProcessorSupplier(org.apache.kafka.test.MockProcessorSupplier) Joined(org.apache.kafka.streams.kstream.Joined) MockApiProcessor(org.apache.kafka.test.MockApiProcessor) Matcher(java.util.regex.Matcher) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) ProcessorSupplier(org.apache.kafka.streams.processor.api.ProcessorSupplier) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) TransformerSupplier(org.apache.kafka.streams.kstream.TransformerSupplier) Serde(org.apache.kafka.common.serialization.Serde) Arrays.asList(java.util.Arrays.asList) TopologyWrapper(org.apache.kafka.streams.TopologyWrapper) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) SourceNode(org.apache.kafka.streams.processor.internals.SourceNode) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) TestRecord(org.apache.kafka.streams.test.TestRecord) ValueMapperWithKey(org.apache.kafka.streams.kstream.ValueMapperWithKey) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) MockMapper(org.apache.kafka.test.MockMapper) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TopicNameExtractor(org.apache.kafka.streams.processor.TopicNameExtractor) KeyValue(org.apache.kafka.streams.KeyValue) Instant(java.time.Instant) Bytes(org.apache.kafka.common.utils.Bytes) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) List(java.util.List) Predicate(org.apache.kafka.streams.kstream.Predicate) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Materialized(org.apache.kafka.streams.kstream.Materialized) Pattern(java.util.regex.Pattern) ProcessorTopology(org.apache.kafka.streams.processor.internals.ProcessorTopology) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) Function(java.util.function.Function) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) ValueJoinerWithKey(org.apache.kafka.streams.kstream.ValueJoinerWithKey) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Named(org.apache.kafka.streams.kstream.Named) ValueTransformer(org.apache.kafka.streams.kstream.ValueTransformer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Collections.emptyMap(java.util.Collections.emptyMap) KTable(org.apache.kafka.streams.kstream.KTable) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Transformer(org.apache.kafka.streams.kstream.Transformer) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) ValueTransformerWithKeySupplier(org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier) KeyValueTimestamp(org.apache.kafka.streams.KeyValueTimestamp) Grouped(org.apache.kafka.streams.kstream.Grouped) TimeUnit(java.util.concurrent.TimeUnit) FailOnInvalidTimestamp(org.apache.kafka.streams.processor.FailOnInvalidTimestamp) Assert.assertNull(org.junit.Assert.assertNull) GlobalKTable(org.apache.kafka.streams.kstream.GlobalKTable) IsNull.notNullValue(org.hamcrest.core.IsNull.notNullValue) ValueTransformerWithKey(org.apache.kafka.streams.kstream.ValueTransformerWithKey) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KStream(org.apache.kafka.streams.kstream.KStream) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Test(org.junit.Test)

Aggregations

Duration.ofMillis (java.time.Duration.ofMillis)3 Arrays (java.util.Arrays)3 Properties (java.util.Properties)3 Serdes (org.apache.kafka.common.serialization.Serdes)3 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)3 KeyValue (org.apache.kafka.streams.KeyValue)3 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)3 TestInputTopic (org.apache.kafka.streams.TestInputTopic)3 Topology (org.apache.kafka.streams.Topology)3 TopologyTestDriver (org.apache.kafka.streams.TopologyTestDriver)3 Consumed (org.apache.kafka.streams.kstream.Consumed)3 JoinWindows (org.apache.kafka.streams.kstream.JoinWindows)3 KStream (org.apache.kafka.streams.kstream.KStream)3 Duration (java.time.Duration)2 Instant (java.time.Instant)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2