Search in sources :

Example 6 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class StreamsGraphTest method shouldNotThrowNPEWithMergeNodes.

@Test
public // Topology in this test from https://issues.apache.org/jira/browse/KAFKA-9739
void shouldNotThrowNPEWithMergeNodes() {
    final Properties properties = new Properties();
    properties.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "test-application");
    properties.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    properties.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
    final StreamsBuilder builder = new StreamsBuilder();
    initializer = () -> "";
    aggregator = (aggKey, value, aggregate) -> aggregate + value.length();
    final TransformerSupplier<String, String, KeyValue<String, String>> transformSupplier = () -> new Transformer<String, String, KeyValue<String, String>>() {

        @Override
        public void init(final ProcessorContext context) {
        }

        @Override
        public KeyValue<String, String> transform(final String key, final String value) {
            return KeyValue.pair(key, value);
        }

        @Override
        public void close() {
        }
    };
    final KStream<String, String> retryStream = builder.stream("retryTopic", Consumed.with(Serdes.String(), Serdes.String())).transform(transformSupplier).groupByKey(Grouped.with(Serdes.String(), Serdes.String())).aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.String())).suppress(Suppressed.untilTimeLimit(Duration.ofSeconds(500), Suppressed.BufferConfig.maxBytes(64_000_000))).toStream().flatMap((k, v) -> new ArrayList<>());
    final KTable<String, String> idTable = builder.stream("id-table-topic", Consumed.with(Serdes.String(), Serdes.String())).flatMap((k, v) -> new ArrayList<KeyValue<String, String>>()).peek((subscriptionId, recipientId) -> System.out.println("data " + subscriptionId + " " + recipientId)).groupByKey(Grouped.with(Serdes.String(), Serdes.String())).aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> joinStream = builder.stream("internal-topic-command", Consumed.with(Serdes.String(), Serdes.String())).peek((subscriptionId, command) -> System.out.println("stdoutput")).mapValues((k, v) -> v).merge(retryStream).leftJoin(idTable, (v1, v2) -> v1 + v2, Joined.with(Serdes.String(), Serdes.String(), Serdes.String()));
    joinStream.split().branch((k, v) -> v.equals("some-value"), Branched.withConsumer(ks -> ks.map(KeyValue::pair).peek((recipientId, command) -> System.out.println("printing out")).to("external-command", Produced.with(Serdes.String(), Serdes.String())))).defaultBranch(Branched.withConsumer(ks -> {
        ks.filter((k, v) -> v != null).peek((subscriptionId, wrapper) -> System.out.println("Printing output")).mapValues((k, v) -> v).to("dlq-topic", Produced.with(Serdes.String(), Serdes.String()));
        ks.map(KeyValue::pair).to("retryTopic", Produced.with(Serdes.String(), Serdes.String()));
    }));
    final Topology topology = builder.build(properties);
    assertEquals(expectedComplexMergeOptimizeTopology, topology.describe().toString());
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) TransformerSupplier(org.apache.kafka.streams.kstream.TransformerSupplier) Locale(java.util.Locale) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) Aggregator(org.apache.kafka.streams.kstream.Aggregator) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Transformer(org.apache.kafka.streams.kstream.Transformer) KeyValue(org.apache.kafka.streams.KeyValue) Suppressed(org.apache.kafka.streams.kstream.Suppressed) Test(org.junit.Test) Branched(org.apache.kafka.streams.kstream.Branched) Grouped(org.apache.kafka.streams.kstream.Grouped) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) List(java.util.List) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Materialized(org.apache.kafka.streams.kstream.Materialized) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) KeyValue(org.apache.kafka.streams.KeyValue) Transformer(org.apache.kafka.streams.kstream.Transformer) ArrayList(java.util.ArrayList) Topology(org.apache.kafka.streams.Topology) Properties(java.util.Properties) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 7 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class StreamsOptimizedTest method main.

public static void main(final String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("StreamsOptimizedTest requires one argument (properties-file) but no provided: ");
    }
    final String propFileName = args[0];
    final Properties streamsProperties = Utils.loadProps(propFileName);
    System.out.println("StreamsTest instance started StreamsOptimizedTest");
    System.out.println("props=" + streamsProperties);
    final String inputTopic = (String) Objects.requireNonNull(streamsProperties.remove("input.topic"));
    final String aggregationTopic = (String) Objects.requireNonNull(streamsProperties.remove("aggregation.topic"));
    final String reduceTopic = (String) Objects.requireNonNull(streamsProperties.remove("reduce.topic"));
    final String joinTopic = (String) Objects.requireNonNull(streamsProperties.remove("join.topic"));
    final Pattern repartitionTopicPattern = Pattern.compile("Sink: .*-repartition");
    final Initializer<Integer> initializer = () -> 0;
    final Aggregator<String, String, Integer> aggregator = (k, v, agg) -> agg + v.length();
    final Reducer<String> reducer = (v1, v2) -> Integer.toString(Integer.parseInt(v1) + Integer.parseInt(v2));
    final Function<String, String> keyFunction = s -> Integer.toString(Integer.parseInt(s) % 9);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(inputTopic, Consumed.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> mappedStream = sourceStream.selectKey((k, v) -> keyFunction.apply(v));
    final KStream<String, Long> countStream = mappedStream.groupByKey().count(Materialized.with(Serdes.String(), Serdes.Long())).toStream();
    mappedStream.groupByKey().aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.Integer())).toStream().peek((k, v) -> System.out.println(String.format("AGGREGATED key=%s value=%s", k, v))).to(aggregationTopic, Produced.with(Serdes.String(), Serdes.Integer()));
    mappedStream.groupByKey().reduce(reducer, Materialized.with(Serdes.String(), Serdes.String())).toStream().peek((k, v) -> System.out.println(String.format("REDUCED key=%s value=%s", k, v))).to(reduceTopic, Produced.with(Serdes.String(), Serdes.String()));
    mappedStream.join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(500)), StreamJoined.with(Serdes.String(), Serdes.String(), Serdes.Long())).peek((k, v) -> System.out.println(String.format("JOINED key=%s value=%s", k, v))).to(joinTopic, Produced.with(Serdes.String(), Serdes.String()));
    final Properties config = new Properties();
    config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsOptimizedTest");
    config.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
    config.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    config.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    config.setProperty(StreamsConfig.adminClientPrefix(AdminClientConfig.RETRIES_CONFIG), "100");
    config.putAll(streamsProperties);
    final Topology topology = builder.build(config);
    final KafkaStreams streams = new KafkaStreams(topology, config);
    streams.setStateListener((newState, oldState) -> {
        if (oldState == State.REBALANCING && newState == State.RUNNING) {
            final int repartitionTopicCount = getCountOfRepartitionTopicsFound(topology.describe().toString(), repartitionTopicPattern);
            System.out.println(String.format("REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%d", repartitionTopicCount));
            System.out.flush();
        }
    });
    streams.cleanUp();
    streams.start();
    Exit.addShutdownHook("streams-shutdown-hook", () -> {
        System.out.println("closing Kafka Streams instance");
        System.out.flush();
        streams.close(Duration.ofMillis(5000));
        System.out.println("OPTIMIZE_TEST Streams Stopped");
        System.out.flush();
    });
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Exit(org.apache.kafka.common.utils.Exit) Produced(org.apache.kafka.streams.kstream.Produced) KStream(org.apache.kafka.streams.kstream.KStream) Function(java.util.function.Function) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) AdminClientConfig(org.apache.kafka.clients.admin.AdminClientConfig) State(org.apache.kafka.streams.KafkaStreams.State) Objects(java.util.Objects) List(java.util.List) Reducer(org.apache.kafka.streams.kstream.Reducer) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Pattern(java.util.regex.Pattern) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Topology(org.apache.kafka.streams.Topology) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder)

Example 8 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project apache-kafka-on-k8s by banzaicloud.

the class SmokeTestClient method createKafkaStreams.

private static KafkaStreams createKafkaStreams(final Properties props, final String kafka) {
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka);
    props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3);
    props.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 2);
    props.put(StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG, 100);
    props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    props.put(StreamsConfig.REPLICATION_FACTOR_CONFIG, 3);
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    props.put(ProducerConfig.RETRIES_CONFIG, Integer.MAX_VALUE);
    props.put(ProducerConfig.ACKS_CONFIG, "all");
    // TODO remove this config or set to smaller value when KIP-91 is merged
    props.put(StreamsConfig.producerPrefix(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG), 80000);
    StreamsBuilder builder = new StreamsBuilder();
    Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
    KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
    source.to(stringSerde, intSerde, "echo");
    KStream<String, Integer> data = source.filter(new Predicate<String, Integer>() {

        @Override
        public boolean test(String key, Integer value) {
            return value == null || value != END;
        }
    });
    data.process(SmokeTestUtil.printProcessorSupplier("data"));
    // min
    KGroupedStream<String, Integer> groupedData = data.groupByKey(Serialized.with(stringSerde, intSerde));
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MAX_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value < aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(1)), intSerde, "uwin-min").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "min");
    KTable<String, Integer> minTable = builder.table("min", stringIntConsumed);
    minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min"));
    // max
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MIN_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value > aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), intSerde, "uwin-max").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "max");
    KTable<String, Integer> maxTable = builder.table("max", stringIntConsumed);
    maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max"));
    // sum
    groupedData.aggregate(new Initializer<Long>() {

        public Long apply() {
            return 0L;
        }
    }, new Aggregator<String, Integer, Long>() {

        @Override
        public Long apply(String aggKey, Integer value, Long aggregate) {
            return (long) value + aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), longSerde, "win-sum").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "sum");
    Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
    KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
    sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum"));
    // cnt
    groupedData.count(TimeWindows.of(TimeUnit.DAYS.toMillis(2)), "uwin-cnt").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "cnt");
    KTable<String, Long> cntTable = builder.table("cnt", stringLongConsumed);
    cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt"));
    // dif
    maxTable.join(minTable, new ValueJoiner<Integer, Integer, Integer>() {

        public Integer apply(Integer value1, Integer value2) {
            return value1 - value2;
        }
    }).to(stringSerde, intSerde, "dif");
    // avg
    sumTable.join(cntTable, new ValueJoiner<Long, Long, Double>() {

        public Double apply(Long value1, Long value2) {
            return (double) value1 / (double) value2;
        }
    }).to(stringSerde, doubleSerde, "avg");
    // test repartition
    Agg agg = new Agg();
    cntTable.groupBy(agg.selector(), Serialized.with(stringSerde, longSerde)).aggregate(agg.init(), agg.adder(), agg.remover(), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).to(stringSerde, longSerde, "tagg");
    final KafkaStreams streamsClient = new KafkaStreams(builder.build(), props);
    streamsClient.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            System.out.println("FATAL: An unexpected exception is encountered on thread " + t + ": " + e);
            streamsClient.close(30, TimeUnit.SECONDS);
        }
    });
    return streamsClient;
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) Aggregator(org.apache.kafka.streams.kstream.Aggregator) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Initializer(org.apache.kafka.streams.kstream.Initializer)

Example 9 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class StreamsNamedRepartitionTest method main.

public static void main(final String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("StreamsNamedRepartitionTest requires one argument (properties-file) but none provided: ");
    }
    final String propFileName = args[0];
    final Properties streamsProperties = Utils.loadProps(propFileName);
    System.out.println("StreamsTest instance started NAMED_REPARTITION_TEST");
    System.out.println("props=" + streamsProperties);
    final String inputTopic = (String) (Objects.requireNonNull(streamsProperties.remove("input.topic")));
    final String aggregationTopic = (String) (Objects.requireNonNull(streamsProperties.remove("aggregation.topic")));
    final boolean addOperators = Boolean.valueOf(Objects.requireNonNull((String) streamsProperties.remove("add.operations")));
    final Initializer<Integer> initializer = () -> 0;
    final Aggregator<String, String, Integer> aggregator = (k, v, agg) -> agg + Integer.parseInt(v);
    final Function<String, String> keyFunction = s -> Integer.toString(Integer.parseInt(s) % 9);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(inputTopic, Consumed.with(Serdes.String(), Serdes.String()));
    sourceStream.peek((k, v) -> System.out.println(String.format("input data key=%s, value=%s", k, v)));
    final KStream<String, String> mappedStream = sourceStream.selectKey((k, v) -> keyFunction.apply(v));
    final KStream<String, String> maybeUpdatedStream;
    if (addOperators) {
        maybeUpdatedStream = mappedStream.filter((k, v) -> true).mapValues(v -> Integer.toString(Integer.parseInt(v) + 1));
    } else {
        maybeUpdatedStream = mappedStream;
    }
    maybeUpdatedStream.groupByKey(Grouped.with("grouped-stream", Serdes.String(), Serdes.String())).aggregate(initializer, aggregator, Materialized.<String, Integer, KeyValueStore<Bytes, byte[]>>as("count-store").withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream().peek((k, v) -> System.out.println(String.format("AGGREGATED key=%s value=%s", k, v))).to(aggregationTopic, Produced.with(Serdes.String(), Serdes.Integer()));
    final Properties config = new Properties();
    config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsNamedRepartitionTest");
    config.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
    config.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    config.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    config.putAll(streamsProperties);
    final Topology topology = builder.build(config);
    final KafkaStreams streams = new KafkaStreams(topology, config);
    streams.setStateListener((newState, oldState) -> {
        if (oldState == State.REBALANCING && newState == State.RUNNING) {
            if (addOperators) {
                System.out.println("UPDATED Topology");
            } else {
                System.out.println("REBALANCING -> RUNNING");
            }
            System.out.flush();
        }
    });
    streams.start();
    Exit.addShutdownHook("streams-shutdown-hook", () -> {
        System.out.println("closing Kafka Streams instance");
        System.out.flush();
        streams.close(Duration.ofMillis(5000));
        System.out.println("NAMED_REPARTITION_TEST Streams Stopped");
        System.out.flush();
    });
}
Also used : Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Properties(java.util.Properties) Exit(org.apache.kafka.common.utils.Exit) Produced(org.apache.kafka.streams.kstream.Produced) Consumed(org.apache.kafka.streams.kstream.Consumed) KStream(org.apache.kafka.streams.kstream.KStream) State(org.apache.kafka.streams.KafkaStreams.State) Function(java.util.function.Function) Grouped(org.apache.kafka.streams.kstream.Grouped) Bytes(org.apache.kafka.common.utils.Bytes) Objects(java.util.Objects) Initializer(org.apache.kafka.streams.kstream.Initializer) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Materialized(org.apache.kafka.streams.kstream.Materialized) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Topology(org.apache.kafka.streams.Topology) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Topology(org.apache.kafka.streams.Topology) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder)

Aggregations

Initializer (org.apache.kafka.streams.kstream.Initializer)9 Aggregator (org.apache.kafka.streams.kstream.Aggregator)7 Properties (java.util.Properties)5 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)5 Test (org.junit.Test)5 Serdes (org.apache.kafka.common.serialization.Serdes)4 KafkaStreams (org.apache.kafka.streams.KafkaStreams)4 StreamsConfig (org.apache.kafka.streams.StreamsConfig)4 Topology (org.apache.kafka.streams.Topology)4 Consumed (org.apache.kafka.streams.kstream.Consumed)4 KStream (org.apache.kafka.streams.kstream.KStream)4 Materialized (org.apache.kafka.streams.kstream.Materialized)4 Produced (org.apache.kafka.streams.kstream.Produced)4 Duration (java.time.Duration)3 Duration.ofMillis (java.time.Duration.ofMillis)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Matcher (java.util.regex.Matcher)3 Pattern (java.util.regex.Pattern)3 Grouped (org.apache.kafka.streams.kstream.Grouped)3