use of org.apache.kafka.common.serialization.StringDeserializer in project kafka by apache.
the class RepartitionOptimizingTest method runTest.
private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> sourceStream = builder.stream(INPUT_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceStream"));
final KStream<String, String> mappedStream = sourceStream.map((k, v) -> KeyValue.pair(k.toUpperCase(Locale.getDefault()), v), Named.as("source-map"));
mappedStream.filter((k, v) -> k.equals("B"), Named.as("process-filter")).mapValues(v -> v.toUpperCase(Locale.getDefault()), Named.as("process-mapValues")).process(() -> new SimpleProcessor(processorValueCollector), Named.as("process"));
final KStream<String, Long> countStream = mappedStream.groupByKey(Grouped.as("count-groupByKey")).count(Named.as("count"), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("count-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream(Named.as("count-toStream"));
countStream.to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("count-to"));
mappedStream.groupByKey(Grouped.as("aggregate-groupByKey")).aggregate(initializer, aggregator, Named.as("aggregate"), Materialized.<String, Integer>as(Stores.inMemoryKeyValueStore("aggregate-store")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Integer())).toStream(Named.as("aggregate-toStream")).to(AGGREGATION_TOPIC, Produced.with(Serdes.String(), Serdes.Integer()).withName("reduce-to"));
// adding operators for case where the repartition node is further downstream
mappedStream.filter((k, v) -> true, Named.as("reduce-filter")).peek((k, v) -> System.out.println(k + ":" + v), Named.as("reduce-peek")).groupByKey(Grouped.as("reduce-groupByKey")).reduce(reducer, Named.as("reducer"), Materialized.as(Stores.inMemoryKeyValueStore("reduce-store"))).toStream(Named.as("reduce-toStream")).to(REDUCE_TOPIC, Produced.with(Serdes.String(), Serdes.String()));
mappedStream.filter((k, v) -> k.equals("A"), Named.as("join-filter")).join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(5000)), StreamJoined.<String, String, Long>with(Stores.inMemoryWindowStore("join-store", ofDays(1), ofMillis(10000), true), Stores.inMemoryWindowStore("other-join-store", ofDays(1), ofMillis(10000), true)).withName("join").withKeySerde(Serdes.String()).withValueSerde(Serdes.String()).withOtherValueSerde(Serdes.Long())).to(JOINED_TOPIC, Produced.as("join-to"));
streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
final Topology topology = builder.build(streamsConfiguration);
topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_TOPIC, stringSerializer, stringSerializer);
final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
final TestOutputTopic<String, Integer> aggregationOutputTopic = topologyTestDriver.createOutputTopic(AGGREGATION_TOPIC, stringDeserializer, new IntegerDeserializer());
final TestOutputTopic<String, String> reduceOutputTopic = topologyTestDriver.createOutputTopic(REDUCE_TOPIC, stringDeserializer, stringDeserializer);
final TestOutputTopic<String, String> joinedOutputTopic = topologyTestDriver.createOutputTopic(JOINED_TOPIC, stringDeserializer, stringDeserializer);
inputTopicA.pipeKeyValueList(getKeyValues());
// Verify the topology
final String topologyString = topology.describe().toString();
if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
} else {
assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
}
// Verify the number of repartition topics
assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
// Verify the values collected by the processor
assertThat(3, equalTo(processorValueCollector.size()));
assertThat(processorValueCollector, equalTo(expectedCollectedProcessorValues));
// Verify the expected output
assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
assertThat(aggregationOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedAggKeyValues)));
assertThat(reduceOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedReduceKeyValues)));
assertThat(joinedOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedJoinKeyValues)));
}
use of org.apache.kafka.common.serialization.StringDeserializer in project kafka by apache.
the class RepartitionWithMergeOptimizingTest method runTest.
private void runTest(final String optimizationConfig, final int expectedNumberRepartitionTopics) {
streamsConfiguration.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, optimizationConfig);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> sourceAStream = builder.stream(INPUT_A_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceAStream"));
final KStream<String, String> sourceBStream = builder.stream(INPUT_B_TOPIC, Consumed.with(Serdes.String(), Serdes.String()).withName("sourceBStream"));
final KStream<String, String> mappedAStream = sourceAStream.map((k, v) -> KeyValue.pair(v.split(":")[0], v), Named.as("mappedAStream"));
final KStream<String, String> mappedBStream = sourceBStream.map((k, v) -> KeyValue.pair(v.split(":")[0], v), Named.as("mappedBStream"));
final KStream<String, String> mergedStream = mappedAStream.merge(mappedBStream, Named.as("mergedStream"));
mergedStream.groupByKey(Grouped.as("long-groupByKey")).count(Named.as("long-count"), Materialized.as(Stores.inMemoryKeyValueStore("long-store"))).toStream(Named.as("long-toStream")).to(COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()).withName("long-to"));
mergedStream.groupByKey(Grouped.as("string-groupByKey")).count(Named.as("string-count"), Materialized.as(Stores.inMemoryKeyValueStore("string-store"))).toStream(Named.as("string-toStream")).mapValues(v -> v.toString(), Named.as("string-mapValues")).to(STRING_COUNT_TOPIC, Produced.with(Serdes.String(), Serdes.String()).withName("string-to"));
final Topology topology = builder.build(streamsConfiguration);
topologyTestDriver = new TopologyTestDriver(topology, streamsConfiguration);
final TestInputTopic<String, String> inputTopicA = topologyTestDriver.createInputTopic(INPUT_A_TOPIC, stringSerializer, stringSerializer);
final TestInputTopic<String, String> inputTopicB = topologyTestDriver.createInputTopic(INPUT_B_TOPIC, stringSerializer, stringSerializer);
final TestOutputTopic<String, Long> countOutputTopic = topologyTestDriver.createOutputTopic(COUNT_TOPIC, stringDeserializer, new LongDeserializer());
final TestOutputTopic<String, String> stringCountOutputTopic = topologyTestDriver.createOutputTopic(STRING_COUNT_TOPIC, stringDeserializer, stringDeserializer);
inputTopicA.pipeKeyValueList(getKeyValues());
inputTopicB.pipeKeyValueList(getKeyValues());
final String topologyString = topology.describe().toString();
// Verify the topology
if (optimizationConfig.equals(StreamsConfig.OPTIMIZE)) {
assertEquals(EXPECTED_OPTIMIZED_TOPOLOGY, topologyString);
} else {
assertEquals(EXPECTED_UNOPTIMIZED_TOPOLOGY, topologyString);
}
// Verify the number of repartition topics
assertEquals(expectedNumberRepartitionTopics, getCountOfRepartitionTopicsFound(topologyString));
// Verify the expected output
assertThat(countOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedCountKeyValues)));
assertThat(stringCountOutputTopic.readKeyValuesToMap(), equalTo(keyValueListToMap(expectedStringCountKeyValues)));
}
use of org.apache.kafka.common.serialization.StringDeserializer in project kafka by apache.
the class StreamTableJoinTopologyOptimizationIntegrationTest method shouldDoStreamTableJoinWithDifferentNumberOfPartitions.
@Test
public void shouldDoStreamTableJoinWithDifferentNumberOfPartitions() throws Exception {
final String storeName = "store";
final String selectKeyName = "selectKey";
final StreamsBuilder streamsBuilder = new StreamsBuilder();
final KStream<Integer, String> stream = streamsBuilder.stream(inputTopic);
final KTable<Integer, String> table = streamsBuilder.table(tableTopic, Materialized.as(storeName));
stream.selectKey((key, value) -> key, Named.as(selectKeyName)).join(table, (value1, value2) -> value2).to(outputTopic);
kafkaStreams = startStreams(streamsBuilder);
final long timestamp = System.currentTimeMillis();
final List<KeyValue<Integer, String>> expectedRecords = Arrays.asList(new KeyValue<>(1, "A"), new KeyValue<>(2, "B"));
sendEvents(inputTopic, timestamp, expectedRecords);
sendEvents(outputTopic, timestamp, expectedRecords);
validateReceivedMessages(outputTopic, new IntegerDeserializer(), new StringDeserializer(), expectedRecords);
final Set<String> allTopicsInCluster = CLUSTER.getAllTopicsInCluster();
final String repartitionTopicName = applicationId + "-" + selectKeyName + "-repartition";
final String tableChangelogStoreName = applicationId + "-" + storeName + "-changelog";
assertTrue(topicExists(repartitionTopicName));
assertEquals(2, getNumberOfPartitionsForTopic(repartitionTopicName));
if (StreamsConfig.OPTIMIZE.equals(topologyOptimization)) {
assertFalse(allTopicsInCluster.contains(tableChangelogStoreName));
} else if (StreamsConfig.NO_OPTIMIZATION.equals(topologyOptimization)) {
assertTrue(allTopicsInCluster.contains(tableChangelogStoreName));
}
}
use of org.apache.kafka.common.serialization.StringDeserializer in project kafka by apache.
the class EosTestDriver method verifyReceivedAllRecords.
private static void verifyReceivedAllRecords(final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> expectedRecords, final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> receivedRecords) {
if (expectedRecords.size() != receivedRecords.size()) {
throw new RuntimeException("Result verification failed. Received " + receivedRecords.size() + " records but expected " + expectedRecords.size());
}
final StringDeserializer stringDeserializer = new StringDeserializer();
final IntegerDeserializer integerDeserializer = new IntegerDeserializer();
for (final Map.Entry<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> partitionRecords : receivedRecords.entrySet()) {
final TopicPartition inputTopicPartition = new TopicPartition("data", partitionRecords.getKey().partition());
final List<ConsumerRecord<byte[], byte[]>> receivedRecordsForPartition = partitionRecords.getValue();
final List<ConsumerRecord<byte[], byte[]>> expectedRecordsForPartition = expectedRecords.get(inputTopicPartition);
System.out.println(partitionRecords.getKey() + " with " + receivedRecordsForPartition.size() + ", " + inputTopicPartition + " with " + expectedRecordsForPartition.size());
final Iterator<ConsumerRecord<byte[], byte[]>> expectedRecord = expectedRecordsForPartition.iterator();
RuntimeException exception = null;
for (final ConsumerRecord<byte[], byte[]> receivedRecord : receivedRecordsForPartition) {
if (!expectedRecord.hasNext()) {
exception = new RuntimeException("Result verification failed for " + receivedRecord + " since there's no more expected record");
}
final ConsumerRecord<byte[], byte[]> expected = expectedRecord.next();
final String receivedKey = stringDeserializer.deserialize(receivedRecord.topic(), receivedRecord.key());
final int receivedValue = integerDeserializer.deserialize(receivedRecord.topic(), receivedRecord.value());
final String expectedKey = stringDeserializer.deserialize(expected.topic(), expected.key());
final int expectedValue = integerDeserializer.deserialize(expected.topic(), expected.value());
if (!receivedKey.equals(expectedKey) || receivedValue != expectedValue) {
exception = new RuntimeException("Result verification failed for " + receivedRecord + " expected <" + expectedKey + "," + expectedValue + "> but was <" + receivedKey + "," + receivedValue + ">");
}
}
if (exception != null) {
throw exception;
}
}
}
use of org.apache.kafka.common.serialization.StringDeserializer in project kafka by apache.
the class EosTestDriver method verifyCnt.
private static void verifyCnt(final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> inputPerTopicPerPartition, final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> cntPerTopicPerPartition) {
final StringDeserializer stringDeserializer = new StringDeserializer();
final LongDeserializer longDeserializer = new LongDeserializer();
final HashMap<String, Long> currentSumPerKey = new HashMap<>();
for (final Map.Entry<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> partitionRecords : cntPerTopicPerPartition.entrySet()) {
final TopicPartition inputTopicPartition = new TopicPartition("repartition", partitionRecords.getKey().partition());
final List<ConsumerRecord<byte[], byte[]>> partitionInput = inputPerTopicPerPartition.get(inputTopicPartition);
final List<ConsumerRecord<byte[], byte[]>> partitionCnt = partitionRecords.getValue();
if (partitionInput.size() != partitionCnt.size()) {
throw new RuntimeException("Result verification failed: expected " + partitionInput.size() + " records for " + partitionRecords.getKey() + " but received " + partitionCnt.size());
}
final Iterator<ConsumerRecord<byte[], byte[]>> inputRecords = partitionInput.iterator();
for (final ConsumerRecord<byte[], byte[]> receivedRecord : partitionCnt) {
final ConsumerRecord<byte[], byte[]> input = inputRecords.next();
final String receivedKey = stringDeserializer.deserialize(receivedRecord.topic(), receivedRecord.key());
final long receivedValue = longDeserializer.deserialize(receivedRecord.topic(), receivedRecord.value());
final String key = stringDeserializer.deserialize(input.topic(), input.key());
Long cnt = currentSumPerKey.get(key);
if (cnt == null) {
cnt = 0L;
}
currentSumPerKey.put(key, ++cnt);
if (!receivedKey.equals(key) || receivedValue != cnt) {
throw new RuntimeException("Result verification failed for " + receivedRecord + " expected <" + key + "," + cnt + "> but was <" + receivedKey + "," + receivedValue + ">");
}
}
}
}
Aggregations