Search in sources :

Example 1 with ValueJoiner

use of org.apache.kafka.streams.kstream.ValueJoiner in project kafka by apache.

the class KTableAggregateTest method shouldForwardToCorrectProcessorNodeWhenMultiCacheEvictions.

@Test
public void shouldForwardToCorrectProcessorNodeWhenMultiCacheEvictions() throws Exception {
    final String tableOne = "tableOne";
    final String tableTwo = "tableTwo";
    final KStreamBuilder builder = new KStreamBuilder();
    final String reduceTopic = "TestDriver-reducer-store-repartition";
    final Map<String, Long> reduceResults = new HashMap<>();
    final KTable<String, String> one = builder.table(Serdes.String(), Serdes.String(), tableOne, tableOne);
    final KTable<Long, String> two = builder.table(Serdes.Long(), Serdes.String(), tableTwo, tableTwo);
    final KTable<String, Long> reduce = two.groupBy(new KeyValueMapper<Long, String, KeyValue<String, Long>>() {

        @Override
        public KeyValue<String, Long> apply(final Long key, final String value) {
            return new KeyValue<>(value, key);
        }
    }, Serdes.String(), Serdes.Long()).reduce(new Reducer<Long>() {

        @Override
        public Long apply(final Long value1, final Long value2) {
            return value1 + value2;
        }
    }, new Reducer<Long>() {

        @Override
        public Long apply(final Long value1, final Long value2) {
            return value1 - value2;
        }
    }, "reducer-store");
    reduce.foreach(new ForeachAction<String, Long>() {

        @Override
        public void apply(final String key, final Long value) {
            reduceResults.put(key, value);
        }
    });
    one.leftJoin(reduce, new ValueJoiner<String, Long, String>() {

        @Override
        public String apply(final String value1, final Long value2) {
            return value1 + ":" + value2;
        }
    }).mapValues(new ValueMapper<String, String>() {

        @Override
        public String apply(final String value) {
            return value;
        }
    });
    driver = new KStreamTestDriver(builder, stateDir, 111);
    driver.process(reduceTopic, "1", new Change<>(1L, null));
    driver.process("tableOne", "2", "2");
    // this should trigger eviction on the reducer-store topic
    driver.process(reduceTopic, "2", new Change<>(2L, null));
    // this wont as it is the same value
    driver.process(reduceTopic, "2", new Change<>(2L, null));
    assertEquals(Long.valueOf(2L), reduceResults.get("2"));
    // this will trigger eviction on the tableOne topic
    // that in turn will cause an eviction on reducer-topic. It will flush
    // key 2 as it is the only dirty entry in the cache
    driver.process("tableOne", "1", "5");
    assertEquals(Long.valueOf(4L), reduceResults.get("2"));
}
Also used : KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KeyValue(org.apache.kafka.streams.KeyValue) HashMap(java.util.HashMap) MockKeyValueMapper(org.apache.kafka.test.MockKeyValueMapper) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) KStreamTestDriver(org.apache.kafka.test.KStreamTestDriver) Test(org.junit.Test)

Example 2 with ValueJoiner

use of org.apache.kafka.streams.kstream.ValueJoiner in project kafka by apache.

the class SmokeTestClient method createKafkaStreams.

private static KafkaStreams createKafkaStreams(File stateDir, String kafka) {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest");
    props.put(StreamsConfig.STATE_DIR_CONFIG, stateDir.toString());
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafka);
    props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3);
    props.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 2);
    props.put(StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG, 100);
    props.put(StreamsConfig.REPLICATION_FACTOR_CONFIG, 2);
    props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, Integer> source = builder.stream(stringSerde, intSerde, "data");
    source.to(stringSerde, intSerde, "echo");
    KStream<String, Integer> data = source.filter(new Predicate<String, Integer>() {

        @Override
        public boolean test(String key, Integer value) {
            return value == null || value != END;
        }
    });
    data.process(SmokeTestUtil.printProcessorSupplier("data"));
    // min
    KGroupedStream<String, Integer> groupedData = data.groupByKey(stringSerde, intSerde);
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MAX_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value < aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(1)), intSerde, "uwin-min").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "min");
    KTable<String, Integer> minTable = builder.table(stringSerde, intSerde, "min", "minStoreName");
    minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min"));
    // max
    groupedData.aggregate(new Initializer<Integer>() {

        public Integer apply() {
            return Integer.MIN_VALUE;
        }
    }, new Aggregator<String, Integer, Integer>() {

        @Override
        public Integer apply(String aggKey, Integer value, Integer aggregate) {
            return (value > aggregate) ? value : aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), intSerde, "uwin-max").toStream().map(new Unwindow<String, Integer>()).to(stringSerde, intSerde, "max");
    KTable<String, Integer> maxTable = builder.table(stringSerde, intSerde, "max", "maxStoreName");
    maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max"));
    // sum
    groupedData.aggregate(new Initializer<Long>() {

        public Long apply() {
            return 0L;
        }
    }, new Aggregator<String, Integer, Long>() {

        @Override
        public Long apply(String aggKey, Integer value, Long aggregate) {
            return (long) value + aggregate;
        }
    }, TimeWindows.of(TimeUnit.DAYS.toMillis(2)), longSerde, "win-sum").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "sum");
    KTable<String, Long> sumTable = builder.table(stringSerde, longSerde, "sum", "sumStoreName");
    sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum"));
    // cnt
    groupedData.count(TimeWindows.of(TimeUnit.DAYS.toMillis(2)), "uwin-cnt").toStream().map(new Unwindow<String, Long>()).to(stringSerde, longSerde, "cnt");
    KTable<String, Long> cntTable = builder.table(stringSerde, longSerde, "cnt", "cntStoreName");
    cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt"));
    // dif
    maxTable.join(minTable, new ValueJoiner<Integer, Integer, Integer>() {

        public Integer apply(Integer value1, Integer value2) {
            return value1 - value2;
        }
    }).to(stringSerde, intSerde, "dif");
    // avg
    sumTable.join(cntTable, new ValueJoiner<Long, Long, Double>() {

        public Double apply(Long value1, Long value2) {
            return (double) value1 / (double) value2;
        }
    }).to(stringSerde, doubleSerde, "avg");
    // test repartition
    Agg agg = new Agg();
    cntTable.groupBy(agg.selector(), stringSerde, longSerde).aggregate(agg.init(), agg.adder(), agg.remover(), longSerde, "cntByCnt").to(stringSerde, longSerde, "tagg");
    final KafkaStreams streamsClient = new KafkaStreams(builder, props);
    streamsClient.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() {

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            System.out.println("FATAL: An unexpected exception is encountered on thread " + t + ": " + e);
            streamsClient.close(30, TimeUnit.SECONDS);
        }
    });
    return streamsClient;
}
Also used : KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Aggregator(org.apache.kafka.streams.kstream.Aggregator) Properties(java.util.Properties) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Initializer(org.apache.kafka.streams.kstream.Initializer)

Example 3 with ValueJoiner

use of org.apache.kafka.streams.kstream.ValueJoiner in project apache-kafka-on-k8s by banzaicloud.

the class StreamsPartitionAssignorTest method shouldNotLoopInfinitelyOnMissingMetadataAndShouldNotCreateRelatedTasks.

@Test
public void shouldNotLoopInfinitelyOnMissingMetadataAndShouldNotCreateRelatedTasks() throws Exception {
    final StreamsBuilder builder = new StreamsBuilder();
    final InternalTopologyBuilder internalTopologyBuilder = StreamsBuilderTest.internalTopologyBuilder(builder);
    internalTopologyBuilder.setApplicationId(applicationId);
    KStream<Object, Object> stream1 = builder.stream("topic1").selectKey(new KeyValueMapper<Object, Object, Object>() {

        @Override
        public Object apply(final Object key, final Object value) {
            return null;
        }
    }).groupByKey().count(Materialized.<Object, Long, KeyValueStore<Bytes, byte[]>>as("count")).toStream().map(new KeyValueMapper<Object, Long, KeyValue<Object, Object>>() {

        @Override
        public KeyValue<Object, Object> apply(final Object key, final Long value) {
            return null;
        }
    });
    builder.stream("unknownTopic").selectKey(new KeyValueMapper<Object, Object, Object>() {

        @Override
        public Object apply(final Object key, final Object value) {
            return null;
        }
    }).join(stream1, new ValueJoiner() {

        @Override
        public Object apply(final Object value1, final Object value2) {
            return null;
        }
    }, JoinWindows.of(0));
    final UUID uuid = UUID.randomUUID();
    final String client = "client1";
    mockTaskManager(Collections.<TaskId>emptySet(), Collections.<TaskId>emptySet(), UUID.randomUUID(), internalTopologyBuilder);
    configurePartitionAssignor(Collections.<String, Object>emptyMap());
    final MockInternalTopicManager mockInternalTopicManager = new MockInternalTopicManager(streamsConfig, mockClientSupplier.restoreConsumer);
    partitionAssignor.setInternalTopicManager(mockInternalTopicManager);
    final Map<String, PartitionAssignor.Subscription> subscriptions = new HashMap<>();
    final Set<TaskId> emptyTasks = Collections.emptySet();
    subscriptions.put(client, new PartitionAssignor.Subscription(Collections.singletonList("unknownTopic"), new SubscriptionInfo(uuid, emptyTasks, emptyTasks, userEndPoint).encode()));
    final Map<String, PartitionAssignor.Assignment> assignment = partitionAssignor.assign(metadata, subscriptions);
    final Map<String, Integer> expectedCreatedInternalTopics = new HashMap<>();
    expectedCreatedInternalTopics.put(applicationId + "-count-repartition", 3);
    expectedCreatedInternalTopics.put(applicationId + "-count-changelog", 3);
    assertThat(mockInternalTopicManager.readyTopics, equalTo(expectedCreatedInternalTopics));
    final List<TopicPartition> expectedAssignment = Arrays.asList(new TopicPartition("topic1", 0), new TopicPartition("topic1", 1), new TopicPartition("topic1", 2), new TopicPartition(applicationId + "-count-repartition", 0), new TopicPartition(applicationId + "-count-repartition", 1), new TopicPartition(applicationId + "-count-repartition", 2));
    assertThat(new HashSet<>(assignment.get(client).partitions()), equalTo(new HashSet<>(expectedAssignment)));
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) TaskId(org.apache.kafka.streams.processor.TaskId) HashMap(java.util.HashMap) MockInternalTopicManager(org.apache.kafka.test.MockInternalTopicManager) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) Bytes(org.apache.kafka.common.utils.Bytes) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) PartitionAssignor(org.apache.kafka.clients.consumer.internals.PartitionAssignor) UUID(java.util.UUID) HashSet(java.util.HashSet) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TopicPartition(org.apache.kafka.common.TopicPartition) StreamsBuilderTest(org.apache.kafka.streams.StreamsBuilderTest) Test(org.junit.Test)

Example 4 with ValueJoiner

use of org.apache.kafka.streams.kstream.ValueJoiner in project apache-kafka-on-k8s by banzaicloud.

the class StreamsPartitionAssignorTest method shouldGenerateTasksForAllCreatedPartitions.

@Test
public void shouldGenerateTasksForAllCreatedPartitions() throws Exception {
    final StreamsBuilder builder = new StreamsBuilder();
    final InternalTopologyBuilder internalTopologyBuilder = StreamsBuilderTest.internalTopologyBuilder(builder);
    internalTopologyBuilder.setApplicationId(applicationId);
    // KStream with 3 partitions
    KStream<Object, Object> stream1 = builder.stream("topic1").map(new KeyValueMapper<Object, Object, KeyValue<Object, Object>>() {

        @Override
        public KeyValue<Object, Object> apply(final Object key, final Object value) {
            return new KeyValue<>(key, value);
        }
    });
    // KTable with 4 partitions
    KTable<Object, Long> table1 = builder.table("topic3").groupBy(new KeyValueMapper<Object, Object, KeyValue<Object, Object>>() {

        @Override
        public KeyValue<Object, Object> apply(final Object key, final Object value) {
            return new KeyValue<>(key, value);
        }
    }).count();
    // joining the stream and the table
    // this triggers the enforceCopartitioning() routine in the StreamsPartitionAssignor,
    // forcing the stream.map to get repartitioned to a topic with four partitions.
    stream1.join(table1, new ValueJoiner() {

        @Override
        public Object apply(final Object value1, final Object value2) {
            return null;
        }
    });
    final UUID uuid = UUID.randomUUID();
    final String client = "client1";
    mockTaskManager(Collections.<TaskId>emptySet(), Collections.<TaskId>emptySet(), UUID.randomUUID(), internalTopologyBuilder);
    configurePartitionAssignor(Collections.<String, Object>emptyMap());
    final MockInternalTopicManager mockInternalTopicManager = new MockInternalTopicManager(streamsConfig, mockClientSupplier.restoreConsumer);
    partitionAssignor.setInternalTopicManager(mockInternalTopicManager);
    final Map<String, PartitionAssignor.Subscription> subscriptions = new HashMap<>();
    final Set<TaskId> emptyTasks = Collections.emptySet();
    subscriptions.put(client, new PartitionAssignor.Subscription(Utils.mkList("topic1", "topic3"), new SubscriptionInfo(uuid, emptyTasks, emptyTasks, userEndPoint).encode()));
    final Map<String, PartitionAssignor.Assignment> assignment = partitionAssignor.assign(metadata, subscriptions);
    final Map<String, Integer> expectedCreatedInternalTopics = new HashMap<>();
    expectedCreatedInternalTopics.put(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-repartition", 4);
    expectedCreatedInternalTopics.put(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-changelog", 4);
    expectedCreatedInternalTopics.put(applicationId + "-KSTREAM-MAP-0000000001-repartition", 4);
    expectedCreatedInternalTopics.put(applicationId + "-topic3-STATE-STORE-0000000002-changelog", 4);
    // check if all internal topics were created as expected
    assertThat(mockInternalTopicManager.readyTopics, equalTo(expectedCreatedInternalTopics));
    final List<TopicPartition> expectedAssignment = Arrays.asList(new TopicPartition("topic1", 0), new TopicPartition("topic1", 1), new TopicPartition("topic1", 2), new TopicPartition("topic3", 0), new TopicPartition("topic3", 1), new TopicPartition("topic3", 2), new TopicPartition("topic3", 3), new TopicPartition(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-repartition", 0), new TopicPartition(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-repartition", 1), new TopicPartition(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-repartition", 2), new TopicPartition(applicationId + "-KTABLE-AGGREGATE-STATE-STORE-0000000006-repartition", 3), new TopicPartition(applicationId + "-KSTREAM-MAP-0000000001-repartition", 0), new TopicPartition(applicationId + "-KSTREAM-MAP-0000000001-repartition", 1), new TopicPartition(applicationId + "-KSTREAM-MAP-0000000001-repartition", 2), new TopicPartition(applicationId + "-KSTREAM-MAP-0000000001-repartition", 3));
    // check if we created a task for all expected topicPartitions.
    assertThat(new HashSet<>(assignment.get(client).partitions()), equalTo(new HashSet<>(expectedAssignment)));
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) TaskId(org.apache.kafka.streams.processor.TaskId) HashMap(java.util.HashMap) MockInternalTopicManager(org.apache.kafka.test.MockInternalTopicManager) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) PartitionAssignor(org.apache.kafka.clients.consumer.internals.PartitionAssignor) UUID(java.util.UUID) HashSet(java.util.HashSet) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TopicPartition(org.apache.kafka.common.TopicPartition) StreamsBuilderTest(org.apache.kafka.streams.StreamsBuilderTest) Test(org.junit.Test)

Example 5 with ValueJoiner

use of org.apache.kafka.streams.kstream.ValueJoiner in project kafka by apache.

the class KTableKTableForeignKeyJoinMaterializationIntegrationTest method getTopology.

private Topology getTopology(final Properties streamsConfig, final String queryableStoreName) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KTable<String, String> left = builder.table(LEFT_TABLE, Consumed.with(Serdes.String(), Serdes.String()));
    final KTable<String, String> right = builder.table(RIGHT_TABLE, Consumed.with(Serdes.String(), Serdes.String()));
    final Function<String, String> extractor = value -> value.split("\\|")[1];
    final ValueJoiner<String, String, String> joiner = (value1, value2) -> "(" + value1 + "," + value2 + ")";
    final Materialized<String, String, KeyValueStore<Bytes, byte[]>> materialized;
    if (queryable) {
        materialized = Materialized.<String, String, KeyValueStore<Bytes, byte[]>>as(queryableStoreName).withValueSerde(Serdes.String());
    } else {
        materialized = Materialized.with(null, Serdes.String());
    }
    final KTable<String, String> joinResult;
    if (this.materialized) {
        joinResult = left.join(right, extractor, joiner, materialized);
    } else {
        joinResult = left.join(right, extractor, joiner);
    }
    joinResult.toStream().to(OUTPUT, Produced.with(null, Serdes.String()));
    return builder.build(streamsConfig);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) CoreMatchers.is(org.hamcrest.CoreMatchers.is) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Utils.mkProperties(org.apache.kafka.common.utils.Utils.mkProperties) Function(java.util.function.Function) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) TestName(org.junit.rules.TestName) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Collections.emptyMap(java.util.Collections.emptyMap) KTable(org.apache.kafka.streams.kstream.KTable) TestOutputTopic(org.apache.kafka.streams.TestOutputTopic) Properties(java.util.Properties) TestUtils(org.apache.kafka.test.TestUtils) Consumed(org.apache.kafka.streams.kstream.Consumed) Collection(java.util.Collection) Test(org.junit.Test) Bytes(org.apache.kafka.common.utils.Bytes) Rule(org.junit.Rule) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) Materialized(org.apache.kafka.streams.kstream.Materialized) TestInputTopic(org.apache.kafka.streams.TestInputTopic) Topology(org.apache.kafka.streams.Topology) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore)

Aggregations

ValueJoiner (org.apache.kafka.streams.kstream.ValueJoiner)20 Test (org.junit.Test)18 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)16 KeyValue (org.apache.kafka.streams.KeyValue)15 Properties (java.util.Properties)14 HashMap (java.util.HashMap)12 Serdes (org.apache.kafka.common.serialization.Serdes)12 KTable (org.apache.kafka.streams.kstream.KTable)12 KeyValueMapper (org.apache.kafka.streams.kstream.KeyValueMapper)11 Materialized (org.apache.kafka.streams.kstream.Materialized)11 List (java.util.List)10 Bytes (org.apache.kafka.common.utils.Bytes)9 Consumed (org.apache.kafka.streams.kstream.Consumed)9 Assert.assertEquals (org.junit.Assert.assertEquals)9 Arrays.asList (java.util.Arrays.asList)8 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)8 StreamsConfig (org.apache.kafka.streams.StreamsConfig)8 Before (org.junit.Before)8 Collections.emptyMap (java.util.Collections.emptyMap)7 HashSet (java.util.HashSet)7