use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.
the class QueryableStateIntegrationTest method createCountStream.
/**
* Creates a typical word count topology
*/
private KafkaStreams createCountStream(final String inputTopic, final String outputTopic, final String windowOutputTopic, final String storeName, final String windowStoreName, final Properties streamsConfiguration) {
final StreamsBuilder builder = new StreamsBuilder();
final Serde<String> stringSerde = Serdes.String();
final KStream<String, String> textLines = builder.stream(inputTopic, Consumed.with(stringSerde, stringSerde));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues((ValueMapper<String, Iterable<String>>) value -> Arrays.asList(value.split("\\W+"))).groupBy(MockMapper.selectValueMapper());
// Create a State Store for the all time word count
groupedByWord.count(Materialized.as(storeName + "-" + inputTopic)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
// Create a Windowed State Store that contains the word count for every 1 minute
groupedByWord.windowedBy(TimeWindows.of(ofMillis(WINDOW_SIZE))).count(Materialized.as(windowStoreName + "-" + inputTopic)).toStream((key, value) -> key.key()).to(windowOutputTopic, Produced.with(Serdes.String(), Serdes.Long()));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.
the class StreamTableJoinTopologyOptimizationIntegrationTest method shouldDoStreamTableJoinWithDifferentNumberOfPartitions.
@Test
public void shouldDoStreamTableJoinWithDifferentNumberOfPartitions() throws Exception {
final String storeName = "store";
final String selectKeyName = "selectKey";
final StreamsBuilder streamsBuilder = new StreamsBuilder();
final KStream<Integer, String> stream = streamsBuilder.stream(inputTopic);
final KTable<Integer, String> table = streamsBuilder.table(tableTopic, Materialized.as(storeName));
stream.selectKey((key, value) -> key, Named.as(selectKeyName)).join(table, (value1, value2) -> value2).to(outputTopic);
kafkaStreams = startStreams(streamsBuilder);
final long timestamp = System.currentTimeMillis();
final List<KeyValue<Integer, String>> expectedRecords = Arrays.asList(new KeyValue<>(1, "A"), new KeyValue<>(2, "B"));
sendEvents(inputTopic, timestamp, expectedRecords);
sendEvents(outputTopic, timestamp, expectedRecords);
validateReceivedMessages(outputTopic, new IntegerDeserializer(), new StringDeserializer(), expectedRecords);
final Set<String> allTopicsInCluster = CLUSTER.getAllTopicsInCluster();
final String repartitionTopicName = applicationId + "-" + selectKeyName + "-repartition";
final String tableChangelogStoreName = applicationId + "-" + storeName + "-changelog";
assertTrue(topicExists(repartitionTopicName));
assertEquals(2, getNumberOfPartitionsForTopic(repartitionTopicName));
if (StreamsConfig.OPTIMIZE.equals(topologyOptimization)) {
assertFalse(allTopicsInCluster.contains(tableChangelogStoreName));
} else if (StreamsConfig.NO_OPTIMIZATION.equals(topologyOptimization)) {
assertTrue(allTopicsInCluster.contains(tableChangelogStoreName));
}
}
use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.
the class StreamsGraphTest method shouldBeAbleToProcessNestedMultipleKeyChangingNodes.
@Test
public void shouldBeAbleToProcessNestedMultipleKeyChangingNodes() {
final Properties properties = new Properties();
properties.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "test-application");
properties.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
properties.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> inputStream = builder.stream("inputTopic");
final KStream<String, String> changedKeyStream = inputStream.selectKey((k, v) -> v.substring(0, 5));
// first repartition
changedKeyStream.groupByKey(Grouped.as("count-repartition")).count(Materialized.as("count-store")).toStream().to("count-topic", Produced.with(Serdes.String(), Serdes.Long()));
// second repartition
changedKeyStream.groupByKey(Grouped.as("windowed-repartition")).windowedBy(TimeWindows.of(Duration.ofSeconds(5))).count(Materialized.as("windowed-count-store")).toStream().map((k, v) -> KeyValue.pair(k.key(), v)).to("windowed-count", Produced.with(Serdes.String(), Serdes.Long()));
builder.build(properties);
}
use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.
the class StreamsGraphTest method shouldNotThrowNPEWithMergeNodes.
@Test
public // Topology in this test from https://issues.apache.org/jira/browse/KAFKA-9739
void shouldNotThrowNPEWithMergeNodes() {
final Properties properties = new Properties();
properties.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "test-application");
properties.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
properties.setProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
final StreamsBuilder builder = new StreamsBuilder();
initializer = () -> "";
aggregator = (aggKey, value, aggregate) -> aggregate + value.length();
final TransformerSupplier<String, String, KeyValue<String, String>> transformSupplier = () -> new Transformer<String, String, KeyValue<String, String>>() {
@Override
public void init(final ProcessorContext context) {
}
@Override
public KeyValue<String, String> transform(final String key, final String value) {
return KeyValue.pair(key, value);
}
@Override
public void close() {
}
};
final KStream<String, String> retryStream = builder.stream("retryTopic", Consumed.with(Serdes.String(), Serdes.String())).transform(transformSupplier).groupByKey(Grouped.with(Serdes.String(), Serdes.String())).aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.String())).suppress(Suppressed.untilTimeLimit(Duration.ofSeconds(500), Suppressed.BufferConfig.maxBytes(64_000_000))).toStream().flatMap((k, v) -> new ArrayList<>());
final KTable<String, String> idTable = builder.stream("id-table-topic", Consumed.with(Serdes.String(), Serdes.String())).flatMap((k, v) -> new ArrayList<KeyValue<String, String>>()).peek((subscriptionId, recipientId) -> System.out.println("data " + subscriptionId + " " + recipientId)).groupByKey(Grouped.with(Serdes.String(), Serdes.String())).aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.String()));
final KStream<String, String> joinStream = builder.stream("internal-topic-command", Consumed.with(Serdes.String(), Serdes.String())).peek((subscriptionId, command) -> System.out.println("stdoutput")).mapValues((k, v) -> v).merge(retryStream).leftJoin(idTable, (v1, v2) -> v1 + v2, Joined.with(Serdes.String(), Serdes.String(), Serdes.String()));
joinStream.split().branch((k, v) -> v.equals("some-value"), Branched.withConsumer(ks -> ks.map(KeyValue::pair).peek((recipientId, command) -> System.out.println("printing out")).to("external-command", Produced.with(Serdes.String(), Serdes.String())))).defaultBranch(Branched.withConsumer(ks -> {
ks.filter((k, v) -> v != null).peek((subscriptionId, wrapper) -> System.out.println("Printing output")).mapValues((k, v) -> v).to("dlq-topic", Produced.with(Serdes.String(), Serdes.String()));
ks.map(KeyValue::pair).to("retryTopic", Produced.with(Serdes.String(), Serdes.String()));
}));
final Topology topology = builder.build(properties);
assertEquals(expectedComplexMergeOptimizeTopology, topology.describe().toString());
}
use of org.apache.kafka.streams.kstream.KStream in project kafka by apache.
the class StreamsOptimizedTest method main.
public static void main(final String[] args) throws Exception {
if (args.length < 1) {
System.err.println("StreamsOptimizedTest requires one argument (properties-file) but no provided: ");
}
final String propFileName = args[0];
final Properties streamsProperties = Utils.loadProps(propFileName);
System.out.println("StreamsTest instance started StreamsOptimizedTest");
System.out.println("props=" + streamsProperties);
final String inputTopic = (String) Objects.requireNonNull(streamsProperties.remove("input.topic"));
final String aggregationTopic = (String) Objects.requireNonNull(streamsProperties.remove("aggregation.topic"));
final String reduceTopic = (String) Objects.requireNonNull(streamsProperties.remove("reduce.topic"));
final String joinTopic = (String) Objects.requireNonNull(streamsProperties.remove("join.topic"));
final Pattern repartitionTopicPattern = Pattern.compile("Sink: .*-repartition");
final Initializer<Integer> initializer = () -> 0;
final Aggregator<String, String, Integer> aggregator = (k, v, agg) -> agg + v.length();
final Reducer<String> reducer = (v1, v2) -> Integer.toString(Integer.parseInt(v1) + Integer.parseInt(v2));
final Function<String, String> keyFunction = s -> Integer.toString(Integer.parseInt(s) % 9);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> sourceStream = builder.stream(inputTopic, Consumed.with(Serdes.String(), Serdes.String()));
final KStream<String, String> mappedStream = sourceStream.selectKey((k, v) -> keyFunction.apply(v));
final KStream<String, Long> countStream = mappedStream.groupByKey().count(Materialized.with(Serdes.String(), Serdes.Long())).toStream();
mappedStream.groupByKey().aggregate(initializer, aggregator, Materialized.with(Serdes.String(), Serdes.Integer())).toStream().peek((k, v) -> System.out.println(String.format("AGGREGATED key=%s value=%s", k, v))).to(aggregationTopic, Produced.with(Serdes.String(), Serdes.Integer()));
mappedStream.groupByKey().reduce(reducer, Materialized.with(Serdes.String(), Serdes.String())).toStream().peek((k, v) -> System.out.println(String.format("REDUCED key=%s value=%s", k, v))).to(reduceTopic, Produced.with(Serdes.String(), Serdes.String()));
mappedStream.join(countStream, (v1, v2) -> v1 + ":" + v2.toString(), JoinWindows.of(ofMillis(500)), StreamJoined.with(Serdes.String(), Serdes.String(), Serdes.Long())).peek((k, v) -> System.out.println(String.format("JOINED key=%s value=%s", k, v))).to(joinTopic, Produced.with(Serdes.String(), Serdes.String()));
final Properties config = new Properties();
config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsOptimizedTest");
config.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
config.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
config.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
config.setProperty(StreamsConfig.adminClientPrefix(AdminClientConfig.RETRIES_CONFIG), "100");
config.putAll(streamsProperties);
final Topology topology = builder.build(config);
final KafkaStreams streams = new KafkaStreams(topology, config);
streams.setStateListener((newState, oldState) -> {
if (oldState == State.REBALANCING && newState == State.RUNNING) {
final int repartitionTopicCount = getCountOfRepartitionTopicsFound(topology.describe().toString(), repartitionTopicPattern);
System.out.println(String.format("REBALANCING -> RUNNING with REPARTITION TOPIC COUNT=%d", repartitionTopicCount));
System.out.flush();
}
});
streams.cleanUp();
streams.start();
Exit.addShutdownHook("streams-shutdown-hook", () -> {
System.out.println("closing Kafka Streams instance");
System.out.flush();
streams.close(Duration.ofMillis(5000));
System.out.println("OPTIMIZE_TEST Streams Stopped");
System.out.flush();
});
}
Aggregations