Search in sources :

Example 1 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class KGroupedStreamImplTest method shouldAggregateSessionWindows.

@Test
public void shouldAggregateSessionWindows() throws Exception {
    final Map<Windowed<String>, Integer> results = new HashMap<>();
    groupedStream.aggregate(new Initializer<Integer>() {

        @Override
        public Integer apply() {
            return 0;
        }
    }, new Aggregator<String, String, Integer>() {

        @Override
        public Integer apply(final String aggKey, final String value, final Integer aggregate) {
            return aggregate + 1;
        }
    }, new Merger<String, Integer>() {

        @Override
        public Integer apply(final String aggKey, final Integer aggOne, final Integer aggTwo) {
            return aggOne + aggTwo;
        }
    }, SessionWindows.with(30), Serdes.Integer(), "session-store").foreach(new ForeachAction<Windowed<String>, Integer>() {

        @Override
        public void apply(final Windowed<String> key, final Integer value) {
            results.put(key, value);
        }
    });
    driver = new KStreamTestDriver(builder, TestUtils.tempDirectory());
    driver.setTime(10);
    driver.process(TOPIC, "1", "1");
    driver.setTime(15);
    driver.process(TOPIC, "2", "2");
    driver.setTime(30);
    driver.process(TOPIC, "1", "1");
    driver.setTime(70);
    driver.process(TOPIC, "1", "1");
    driver.setTime(90);
    driver.process(TOPIC, "1", "1");
    driver.setTime(100);
    driver.process(TOPIC, "1", "1");
    driver.flushState();
    assertEquals(Integer.valueOf(2), results.get(new Windowed<>("1", new SessionWindow(10, 30))));
    assertEquals(Integer.valueOf(1), results.get(new Windowed<>("2", new SessionWindow(15, 15))));
    assertEquals(Integer.valueOf(3), results.get(new Windowed<>("1", new SessionWindow(70, 100))));
}
Also used : HashMap(java.util.HashMap) Aggregator(org.apache.kafka.streams.kstream.Aggregator) MockAggregator(org.apache.kafka.test.MockAggregator) Windowed(org.apache.kafka.streams.kstream.Windowed) KStreamTestDriver(org.apache.kafka.test.KStreamTestDriver) Merger(org.apache.kafka.streams.kstream.Merger) MockInitializer(org.apache.kafka.test.MockInitializer) Initializer(org.apache.kafka.streams.kstream.Initializer) Test(org.junit.Test)

Example 2 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class SmokeTestClient method createKafkaStreams.

private static KafkaStreams createKafkaStreams(File stateDir, String kafka) {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest");
    props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir.toString());
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka);
    props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3);
    props.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 2);
    props.put(StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG, 100);
    props.put(StreamsConfig.REPLICATION_FACTOR_CONFIG, 2);
    props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, Integer> source = builder.stream(stringSerde, intSerde, "data");
    source.to(stringSerde, intSerde, "echo");
    KStream<String, Integer> data = source.filter(new Predicate<String, Integer>() {

        @Override
        public boolean test(String key, Integer value) {
            return value == null || value != END;
        }
    });
    data.process(SmokeTestUtil.printProcessorSupplier("data"));
    // min
    KGroupedStream<String, Integer> groupedData = data.groupByKey(stringSerde, intSerde);
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MAX_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value < aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(1)), intSerde, "uwin-min").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "min");
    KTable<String, Integer> minTable = builder.table(stringSerde, intSerde, "min", "minStoreName");
    minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min"));
    // max
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MIN_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value > aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), intSerde, "uwin-max").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "max");
    KTable<String, Integer> maxTable = builder.table(stringSerde, intSerde, "max", "maxStoreName");
    maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max"));
    // sum
    groupedData.aggregate(new Initializer<Long>() {

        public Long apply() {
            return 0L;
        }
    }, new Aggregator<String, Integer, Long>() {

        @Override
        public Long apply(String aggKey, Integer value, Long aggregate) {
            return (long) value + aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), longSerde, "win-sum").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "sum");
    KTable<String, Long> sumTable = builder.table(stringSerde, longSerde, "sum", "sumStoreName");
    sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum"));
    // cnt
    groupedData.count(TimeWindows.of(TimeUnit.DAYS.toMillis(2)), "uwin-cnt").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "cnt");
    KTable<String, Long> cntTable = builder.table(stringSerde, longSerde, "cnt", "cntStoreName");
    cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt"));
    // dif
    maxTable.join(minTable, new ValueJoiner<Integer, Integer, Integer>() {

        public Integer apply(Integer value1, Integer value2) {
            return value1 - value2;
        }
    }).to(stringSerde, intSerde, "dif");
    // avg
    sumTable.join(cntTable, new ValueJoiner<Long, Long, Double>() {

        public Double apply(Long value1, Long value2) {
            return (double) value1 / (double) value2;
        }
    }).to(stringSerde, doubleSerde, "avg");
    // test repartition
    Agg agg = new Agg();
    cntTable.groupBy(agg.selector(), stringSerde, longSerde).aggregate(agg.init(), agg.adder(), agg.remover(), longSerde, "cntByCnt").to(stringSerde, longSerde, "tagg");
    final KafkaStreams streamsClient = new KafkaStreams(builder, props);
    streamsClient.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            System.out.println("FATAL: An unexpected exception is encountered on thread " + t + ": " + e);
            streamsClient.close(30, TimeUnit.SECONDS);
        }
    });
    return streamsClient;
}
Also used : KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Properties(java.util.Properties) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Initializer(org.apache.kafka.streams.kstream.Initializer)

Example 3 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project ksql by confluentinc.

the class HoppingWindowExpressionTest method shouldCreateHoppingWindowAggregate.

@Test
public void shouldCreateHoppingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final HoppingWindowExpression windowExpression = new HoppingWindowExpression(10, TimeUnit.SECONDS, 4, TimeUnit.MILLISECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L).advanceBy(4L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 4 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project ksql by confluentinc.

the class TumblingWindowExpressionTest method shouldCreateTumblingWindowAggregate.

@Test
public void shouldCreateTumblingWindowAggregate() {
    final KGroupedStream stream = EasyMock.createNiceMock(KGroupedStream.class);
    final TimeWindowedKStream windowedKStream = EasyMock.createNiceMock(TimeWindowedKStream.class);
    final UdafAggregator aggregator = EasyMock.createNiceMock(UdafAggregator.class);
    final TumblingWindowExpression windowExpression = new TumblingWindowExpression(10, TimeUnit.SECONDS);
    final Initializer initializer = () -> 0;
    final Materialized<String, GenericRow, WindowStore<Bytes, byte[]>> store = Materialized.as("store");
    EasyMock.expect(stream.windowedBy(TimeWindows.of(10000L))).andReturn(windowedKStream);
    EasyMock.expect(windowedKStream.aggregate(same(initializer), same(aggregator), same(store))).andReturn(null);
    EasyMock.replay(stream, windowedKStream);
    windowExpression.applyAggregate(stream, initializer, aggregator, store);
    EasyMock.verify(stream, windowedKStream);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) WindowStore(org.apache.kafka.streams.state.WindowStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Initializer(org.apache.kafka.streams.kstream.Initializer) TimeWindowedKStream(org.apache.kafka.streams.kstream.TimeWindowedKStream) UdafAggregator(io.confluent.ksql.function.UdafAggregator) Test(org.junit.Test)

Example 5 with Initializer

use of org.apache.kafka.streams.kstream.Initializer in project kafka by apache.

the class RepartitionOptimizingTest method runTest.

private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, String> sourceStream = builder.stream(INPUT_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceStream"));
    final KStream<String, String> mappedStream = sourceStream.map((k, v) -> KeyValue.pair(k.toUpperCase(Locale.getDefault()), v), Named.as("source-map"));
    mappedStream.filter((k, v) -> k.equals("B"), Named.as("process-filter")).mapValues(v -> v.toUpperCase(Locale.getDefault()), Named.as("process-mapValues")).process(() -> new SimpleProcessor(processorValueCollector), Named.as("process"));
    final KStream<String, Long> countStream = mappedStream.groupByKey(Grouped.as("count-groupByKey")).count(Named.as("count"), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("count-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream(Named.as("count-toStream"));
    countStream.to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("count-to"));
    mappedStream.groupByKey(Grouped.as("aggregate-groupByKey")).aggregate(initializer, aggregator, Named.as("aggregate"), Materialized.<String, Integer>as(Stores.inMemoryKeyValueStore("aggregate-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream(Named.as("aggregate-toStream")).to(AGGREGATION_TOPIC, Produced.with(Serdes.String(), Serdes.Integer()).withName("reduce-to"));
    // adding operators for case where the repartition node is further downstream
    mappedStream.filter((k, v) -> true, Named.as("reduce-filter")).peek((k, v) -> System.out.println(k + ":" + v), Named.as("reduce-peek")).groupByKey(Grouped.as("reduce-groupByKey")).reduce(reducer, Named.as("reducer"), Materialized.as(Stores.inMemoryKeyValueStore("reduce-store"))).toStream(Named.as("reduce-toStream")).to(REDUCE_TOPIC, Produced.with(Serdes.String(), Serdes.String()));
    mappedStream.filter((k, v) -> k.equals("A"), Named.as("join-filter")).join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(5000)), StreamJoined.<String, String, Long>with(Stores.inMemoryWindowStore("join-store", ofDays(1), ofMillis(10000), true), Stores.inMemoryWindowStore("other-join-store", ofDays(1), ofMillis(10000), true)).withName("join").withKeySerde(Serdes.String()).withValueSerde(Serdes.String()).withOtherValueSerde(Serdes.Long())).to(JOINED_TOPIC, Produced.as("join-to"));
    streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
    final Topology topology = builder.build(streamsConfiguration);
    topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
    final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_TOPIC, stringSerializer, stringSerializer);
    final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
    final TestOutputTopic<String, Integer> aggregationOutputTopic = topologyTestDriver.createOutputTopic(AGGREGATION_TOPIC, stringDeserializer, new IntegerDeserializer());
    final TestOutputTopic<String, String> reduceOutputTopic = topologyTestDriver.createOutputTopic(REDUCE_TOPIC, stringDeserializer, stringDeserializer);
    final TestOutputTopic<String, String> joinedOutputTopic = topologyTestDriver.createOutputTopic(JOINED_TOPIC, stringDeserializer, stringDeserializer);
    inputTopicA.pipeKeyValueList(getKeyValues());
    // Verify the topology
    final String topologyString = topology.describe().toString();
    if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
        assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
    } else {
        assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
    }
    // Verify the number of repartition topics
    assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
    // Verify the values collected by the processor
    assertThat(3, equalTo(processorValueCollector.size()));
    assertThat(processorValueCollector, equalTo(expectedCollectedProcessorValues));
    // Verify the expected output
    assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
    assertThat(aggregationOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedAggKeyValues)));
    assertThat(reduceOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedReduceKeyValues)));
    assertThat(joinedOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedJoinKeyValues)));
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) StreamJoined(org.apache.kafka.streams.kstream.StreamJoined) ArrayList(java.util.ArrayList) Initializer(org.apache.kafka.streams.kstream.Initializer) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Matcher(java.util.regex.Matcher) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Locale(java.util.Locale) Map(java.util.Map) Named(org.apache.kafka.streams.kstream.Named) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Record(org.apache.kafka.streams.processor.api.Record) Deserializer(org.apache.kafka.common.serialization.Deserializer) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Before(org.junit.Before) Duration.ofDays(java.time.Duration.ofDays) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) List(java.util.List) Serializer(org.apache.kafka.common.serialization.Serializer) Reducer(org.apache.kafka.streams.kstream.Reducer) Materialized(org.apache.kafka.streams.kstream.Materialized) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Pattern(java.util.regex.Pattern) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) Assert.assertEquals(org.junit.Assert.assertEquals) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer)

Aggregations

Initializer (org.apache.kafka.streams.kstream.Initializer)9 Aggregator (org.apache.kafka.streams.kstream.Aggregator)7 Properties (java.util.Properties)5 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)5 Test (org.junit.Test)5 Serdes (org.apache.kafka.common.serialization.Serdes)4 KafkaStreams (org.apache.kafka.streams.KafkaStreams)4 StreamsConfig (org.apache.kafka.streams.StreamsConfig)4 Topology (org.apache.kafka.streams.Topology)4 Consumed (org.apache.kafka.streams.kstream.Consumed)4 KStream (org.apache.kafka.streams.kstream.KStream)4 Materialized (org.apache.kafka.streams.kstream.Materialized)4 Produced (org.apache.kafka.streams.kstream.Produced)4 Duration (java.time.Duration)3 Duration.ofMillis (java.time.Duration.ofMillis)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 Matcher (java.util.regex.Matcher)3 Pattern (java.util.regex.Pattern)3 Grouped (org.apache.kafka.streams.kstream.Grouped)3