Search in sources :

Example 1 with Processor

use of org.apache.kafka.streams.processor.api.Processor in project kafka by apache.

the class RepartitionOptimizingTest method runTest.

private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(INPUT_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceStream"));
    final KStream<String, String> mappedStream = sourceStream.map((k, v) -> KeyValue.pair(k.toUpperCase(Locale.getDefault()), v), Named.as("source-map"));
    mappedStream.filter((k, v) -> k.equals("B"), Named.as("process-filter")).mapValues(v -> v.toUpperCase(Locale.getDefault()), Named.as("process-mapValues")).process(() -> new SimpleProcessor(processorValueCollector), Named.as("process"));
    final KStream<String, Long> countStream = mappedStream.groupByKey(Grouped.as("count-groupByKey")).count(Named.as("count"), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("count-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream(Named.as("count-toStream"));
    countStream.to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("count-to"));
    mappedStream.groupByKey(Grouped.as("aggregate-groupByKey")).aggregate(initializer, aggregator, Named.as("aggregate"), Materialized.<String, Integer>as(Stores.inMemoryKeyValueStore("aggregate-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream(Named.as("aggregate-toStream")).to(AGGREGATION_TOPIC, Produced.with(Serdes.String(), Serdes.Integer()).withName("reduce-to"));
    // adding operators for case where the repartition node is further downstream
    mappedStream.filter((k, v) -> true, Named.as("reduce-filter")).peek((k, v) -> System.out.println(k + ":" + v), Named.as("reduce-peek")).groupByKey(Grouped.as("reduce-groupByKey")).reduce(reducer, Named.as("reducer"), Materialized.as(Stores.inMemoryKeyValueStore("reduce-store"))).toStream(Named.as("reduce-toStream")).to(REDUCE_TOPIC, Produced.with(Serdes.String(), Serdes.String()));
    mappedStream.filter((k, v) -> k.equals("A"), Named.as("join-filter")).join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(5000)), StreamJoined.<String, String, Long>with(Stores.inMemoryWindowStore("join-store", ofDays(1), ofMillis(10000), true), Stores.inMemoryWindowStore("other-join-store", ofDays(1), ofMillis(10000), true)).withName("join").withKeySerde(Serdes.String()).withValueSerde(Serdes.String()).withOtherValueSerde(Serdes.Long())).to(JOINED_TOPIC, Produced.as("join-to"));
    streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
    final Topology topology = builder.build(streamsConfiguration);
    topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
    final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_TOPIC, stringSerializer, stringSerializer);
    final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
    final TestOutputTopic<String, Integer> aggregationOutputTopic = topologyTestDriver.createOutputTopic(AGGREGATION_TOPIC, stringDeserializer, new IntegerDeserializer());
    final TestOutputTopic<String, String> reduceOutputTopic = topologyTestDriver.createOutputTopic(REDUCE_TOPIC, stringDeserializer, stringDeserializer);
    final TestOutputTopic<String, String> joinedOutputTopic = topologyTestDriver.createOutputTopic(JOINED_TOPIC, stringDeserializer, stringDeserializer);
    inputTopicA.pipeKeyValueList(getKeyValues());
    // Verify the topology
    final String topologyString = topology.describe().toString();
    if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
        assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
    } else {
        assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
    }
    // Verify the number of repartition topics
    assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
    // Verify the values collected by the processor
    assertThat(3, equalTo(processorValueCollector.size()));
    assertThat(processorValueCollector, equalTo(expectedCollectedProcessorValues));
    // Verify the expected output
    assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
    assertThat(aggregationOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedAggKeyValues)));
    assertThat(reduceOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedReduceKeyValues)));
    assertThat(joinedOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedJoinKeyValues)));
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Locale(java.util.Locale) Map(java.util.Map) Named(org.apache.kafka.streams.kstream.Named) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Record(org.apache.kafka.streams.processor.api.Record) Deserializer(org.apache.kafka.common.serialization.Deserializer) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Before(org.junit.Before) Duration.ofDays(java.time.Duration.ofDays) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) List(java.util.List) Serializer(org.apache.kafka.common.serialization.Serializer) Reducer(org.apache.kafka.streams.kstream.Reducer) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer)

Example 2 with Processor

use of org.apache.kafka.streams.processor.api.Processor in project kafka by apache.

the class GraphGraceSearchUtilTest method shouldExtractGraceFromSessionAncestorThroughStatefulParent.

@Test
public void shouldExtractGraceFromSessionAncestorThroughStatefulParent() {
    final SessionWindows windows = SessionWindows.ofInactivityGapAndGrace(ofMillis(10L), ofMillis(1234L));
    final StatefulProcessorNode<String, Long> graceGrandparent = new StatefulProcessorNode<>("asdf", new ProcessorParameters<>(new KStreamSessionWindowAggregate<String, Long, Integer>(windows, "asdf", null, null, null), "asdf"), (StoreBuilder<?>) null);
    final StatefulProcessorNode<String, Long> statefulParent = new StatefulProcessorNode<>("stateful", new ProcessorParameters<>(() -> new Processor<String, Long, String, Long>() {

        @Override
        public void init(final ProcessorContext<String, Long> context) {
        }

        @Override
        public void process(final Record<String, Long> record) {
        }

        @Override
        public void close() {
        }
    }, "dummy"), (StoreBuilder<?>) null);
    graceGrandparent.addChild(statefulParent);
    final ProcessorGraphNode<String, Long> node = new ProcessorGraphNode<>("stateless", null);
    statefulParent.addChild(node);
    final long extracted = GraphGraceSearchUtil.findAndVerifyWindowGrace(node);
    assertThat(extracted, is(windows.gracePeriodMs() + windows.inactivityGap()));
}
Also used : Processor(org.apache.kafka.streams.processor.api.Processor) ProcessorContext(org.apache.kafka.streams.processor.api.ProcessorContext) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) Record(org.apache.kafka.streams.processor.api.Record) KStreamSessionWindowAggregate(org.apache.kafka.streams.kstream.internals.KStreamSessionWindowAggregate) Test(org.junit.Test)

Example 3 with Processor

use of org.apache.kafka.streams.processor.api.Processor in project kafka by apache.

the class TopologyTestDriverTest method shouldCleanUpPersistentStateStoresOnClose.

@Test
public void shouldCleanUpPersistentStateStoresOnClose() {
    final Topology topology = new Topology();
    topology.addSource("sourceProcessor", "input-topic");
    topology.addProcessor("storeProcessor", new ProcessorSupplier<String, Long, Void, Void>() {

        @Override
        public Processor<String, Long, Void, Void> get() {
            return new Processor<String, Long, Void, Void>() {

                private KeyValueStore<String, Long> store;

                @Override
                public void init(final ProcessorContext<Void, Void> context) {
                    this.store = context.getStateStore("storeProcessorStore");
                }

                @Override
                public void process(final Record<String, Long> record) {
                    store.put(record.key(), record.value());
                }
            };
        }
    }, "sourceProcessor");
    topology.addStateStore(Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("storeProcessorStore"), Serdes.String(), Serdes.Long()), "storeProcessor");
    final Properties config = new Properties();
    config.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-TopologyTestDriver-cleanup");
    config.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    config.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    config.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
    try (final TopologyTestDriver testDriver = new TopologyTestDriver(topology, config)) {
        assertNull(testDriver.getKeyValueStore("storeProcessorStore").get("a"));
        testDriver.pipeRecord("input-topic", new TestRecord<>("a", 1L), new StringSerializer(), new LongSerializer(), Instant.now());
        assertEquals(1L, testDriver.getKeyValueStore("storeProcessorStore").get("a"));
    }
    try (final TopologyTestDriver testDriver = new TopologyTestDriver(topology, config)) {
        assertNull(testDriver.getKeyValueStore("storeProcessorStore").get("a"), "Closing the prior test driver should have cleaned up this store and value.");
    }
}
Also used : Processor(org.apache.kafka.streams.processor.api.Processor) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Utils.mkProperties(org.apache.kafka.common.utils.Utils.mkProperties) Properties(java.util.Properties) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Test(org.junit.jupiter.api.Test)

Example 4 with Processor

use of org.apache.kafka.streams.processor.api.Processor in project kafka by apache.

the class KTableKTableRightJoinTest method shouldLogAndMeterSkippedRecordsDueToNullLeftKeyWithBuiltInMetricsVersionLatest.

@Test
public void shouldLogAndMeterSkippedRecordsDueToNullLeftKeyWithBuiltInMetricsVersionLatest() {
    final StreamsBuilder builder = new StreamsBuilder();
    @SuppressWarnings("unchecked") final Processor<String, Change<String>, String, Change<Object>> join = new KTableKTableRightJoin<>((KTableImpl<String, String, String>) builder.table("left", Consumed.with(Serdes.String(), Serdes.String())), (KTableImpl<String, String, String>) builder.table("right", Consumed.with(Serdes.String(), Serdes.String())), null).get();
    props.setProperty(StreamsConfig.BUILT_IN_METRICS_VERSION_CONFIG, StreamsConfig.METRICS_LATEST);
    final MockProcessorContext<String, Change<Object>> context = new MockProcessorContext<>(props);
    context.setRecordMetadata("left", -1, -2);
    join.init(context);
    try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(KTableKTableRightJoin.class)) {
        join.process(new Record<>(null, new Change<>("new", "old"), 0));
        assertThat(appender.getEvents().stream().filter(e -> e.getLevel().equals("WARN")).map(Event::getMessage).collect(Collectors.toList()), hasItem("Skipping record due to null key. topic=[left] partition=[-1] offset=[-2]"));
    }
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) Event(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender.Event) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Test(org.junit.Test) MockProcessorContext(org.apache.kafka.streams.processor.api.MockProcessorContext) Collectors(java.util.stream.Collectors) LogCaptureAppender(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender) Serdes(org.apache.kafka.common.serialization.Serdes) Record(org.apache.kafka.streams.processor.api.Record) Processor(org.apache.kafka.streams.processor.api.Processor) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) MockProcessorContext(org.apache.kafka.streams.processor.api.MockProcessorContext) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) LogCaptureAppender(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender) Event(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender.Event) Test(org.junit.Test)

Example 5 with Processor

use of org.apache.kafka.streams.processor.api.Processor in project kafka by apache.

the class StreamsBuilderTest method shouldUseSpecifiedNameForSplitOperation.

@Test
public void shouldUseSpecifiedNameForSplitOperation() {
    builder.stream(STREAM_TOPIC).split(Named.as("branch-processor")).branch((k, v) -> true, Branched.as("-1")).branch((k, v) -> false, Branched.as("-2"));
    builder.build();
    final ProcessorTopology topology = builder.internalTopologyBuilder.rewriteTopology(new StreamsConfig(props)).buildTopology();
    assertNamesForOperation(topology, "KSTREAM-SOURCE-0000000000", "branch-processor", "branch-processor-1", "branch-processor-2");
}
Also used : Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) Stores(org.apache.kafka.streams.state.Stores) Joined(org.apache.kafka.streams.kstream.Joined) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) Arrays.asList(java.util.Arrays.asList) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) Is.is(org.hamcrest.core.Is.is) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Assert.fail(org.junit.Assert.fail) NoopValueTransformer(org.apache.kafka.test.NoopValueTransformer) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) MockMapper(org.apache.kafka.test.MockMapper) ProcessorContext(org.apache.kafka.streams.processor.api.ProcessorContext) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Instant(java.time.Instant) Bytes(org.apache.kafka.common.utils.Bytes) NoopValueTransformerWithKey(org.apache.kafka.test.NoopValueTransformerWithKey) List(java.util.List) ProcessorNode(org.apache.kafka.streams.processor.internals.ProcessorNode) Materialized(org.apache.kafka.streams.kstream.Materialized) Pattern(java.util.regex.Pattern) ProcessorTopology(org.apache.kafka.streams.processor.internals.ProcessorTopology) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) SUBTOPOLOGY_0(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.SUBTOPOLOGY_0) SUBTOPOLOGY_1(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.SUBTOPOLOGY_1) HashMap(java.util.HashMap) AutoOffsetReset(org.apache.kafka.streams.Topology.AutoOffsetReset) ForeachAction(org.apache.kafka.streams.kstream.ForeachAction) KStream(org.apache.kafka.streams.kstream.KStream) TopologyException(org.apache.kafka.streams.errors.TopologyException) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) MockPredicate(org.apache.kafka.test.MockPredicate) Named(org.apache.kafka.streams.kstream.Named) Record(org.apache.kafka.streams.processor.api.Record) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Printed(org.apache.kafka.streams.kstream.Printed) Utils(org.apache.kafka.common.utils.Utils) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Branched(org.apache.kafka.streams.kstream.Branched) Grouped(org.apache.kafka.streams.kstream.Grouped) StateStore(org.apache.kafka.streams.processor.StateStore) InternalTopologyBuilder(org.apache.kafka.streams.processor.internals.InternalTopologyBuilder) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) ProcessorTopology(org.apache.kafka.streams.processor.internals.ProcessorTopology) Test(org.junit.Test)

Aggregations

Processor (org.apache.kafka.streams.processor.api.Processor)8 Record (org.apache.kafka.streams.processor.api.Record)7 Properties (java.util.Properties)5 Test (org.junit.Test)5 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)4 ProcessorContext (org.apache.kafka.streams.processor.api.ProcessorContext)4 List (java.util.List)3 Serdes (org.apache.kafka.common.serialization.Serdes)3 Consumed (org.apache.kafka.streams.kstream.Consumed)3 MockProcessorContext (org.apache.kafka.streams.processor.api.MockProcessorContext)3 Arrays (java.util.Arrays)2 Arrays.asList (java.util.Arrays.asList)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Pattern (java.util.regex.Pattern)2 LongSerializer (org.apache.kafka.common.serialization.LongSerializer)2 Utils.mkProperties (org.apache.kafka.common.utils.Utils.mkProperties)2 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)2 StreamsConfig (org.apache.kafka.streams.StreamsConfig)2 Grouped (org.apache.kafka.streams.kstream.Grouped)2