use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class WordCountInteractiveQueriesExample method createStreams.
static KafkaStreams createStreams(final Properties streamsConfiguration) {
final Serde<String> stringSerde = Serdes.String();
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> textLines =, Consumed.with(Serdes.String(), Serdes.String()));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))).groupBy((key, word) -> word, Serialized.with(stringSerde, stringSerde));
// Create a State Store for with the all time word count
groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("word-count").withValueSerde(Serdes.Long()));
// Create a Windowed State Store that contains the word count for every
// 1 minute
groupedByWord.windowedBy(TimeWindows.of(60000)).count(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("windowed-word-count").withValueSerde(Serdes.Long()));
return new KafkaStreams(, streamsConfiguration);
the class FraudService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
// Latch onto instances of the orders and inventory topics
StreamsBuilder builder = new StreamsBuilder();
KStream<String, Order> orders =, Consumed.with(ORDERS.keySerde(), ORDERS.valueSerde())).filter((id, order) -> OrderState.CREATED.equals(order.getState()));
// Create an aggregate of the total value by customer and hold it with the order. We use session windows to
// detect periods of activity.
KTable<Windowed<Long>, OrderValue> aggregate = orders.groupBy((id, order) -> order.getCustomerId(), Serialized.with(Serdes.Long(), ORDERS.valueSerde())).windowedBy(SessionWindows.with(60 * MIN)).aggregate(OrderValue::new, // Calculate running total for each customer within this window
(custId, order, total) -> new OrderValue(order, total.getValue() + order.getQuantity() * order.getPrice()), // include a merger as we're using session windows.
(k, a, b) -> simpleMerge(a, b), Materialized.with(null, Schemas.ORDER_VALUE_SERDE));
// Ditch the windowing and rekey
KStream<String, OrderValue> ordersWithTotals = aggregate.toStream((windowedKey, orderValue) -> windowedKey.key()).filter(// When elements are evicted from a session window they create delete events. Filter these out.
(k, v) -> v != null).selectKey((id, orderValue) -> orderValue.getOrder().getId());
// Now branch the stream into two, for pass and fail, based on whether the windowed total is over Fraud Limit
KStream<String, OrderValue>[] forks = ordersWithTotals.branch((id, orderValue) -> orderValue.getValue() >= FRAUD_LIMIT, (id, orderValue) -> orderValue.getValue() < FRAUD_LIMIT);
forks[0].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, FAIL)).to(, Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
forks[1].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, PASS)).to(, Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
// disable caching to ensure a complete aggregate changelog. This is a little trick we need to apply
// as caching in Kafka Streams will conflate subsequent updates for the same key. Disabling caching ensures
// we get a complete "changelog" from the aggregate(...) step above (i.e. every input event will have a
// corresponding output event.
Properties props = baseStreamsConfig(bootstrapServers, stateDir, FRAUD_SERVICE_APP_ID);
props.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
return new KafkaStreams(, props);
the class InventoryService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
// Latch onto instances of the orders and inventory topics
StreamsBuilder builder = new StreamsBuilder();
KStream<String, Order> orders =, Consumed.with(Topics.ORDERS.keySerde(), Topics.ORDERS.valueSerde()));
KTable<Product, Integer> warehouseInventory = builder.table(, Consumed.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.WAREHOUSE_INVENTORY.valueSerde()));
// Create a store to reserve inventory whilst the order is processed.
// This will be prepopulated from Kafka before the service starts processing
StoreBuilder reservedStock = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(RESERVED_STOCK_STORE_NAME), Topics.WAREHOUSE_INVENTORY.keySerde(), Serdes.Long()).withLoggingEnabled(new HashMap<>());
// First change orders stream to be keyed by Product (so we can join with warehouse inventory)
orders.selectKey((id, order) -> order.getProduct()).filter((id, order) -> OrderState.CREATED.equals(order.getState())).join(warehouseInventory, KeyValue::new, Joined.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.ORDERS.valueSerde(), Serdes.Integer())).transform(InventoryValidator::new, RESERVED_STOCK_STORE_NAME).to(, Produced.with(Topics.ORDER_VALIDATIONS.keySerde(), Topics.ORDER_VALIDATIONS.valueSerde()));
return new KafkaStreams(, MicroserviceUtils.baseStreamsConfig(bootstrapServers, stateDir, INVENTORY_SERVICE_APP_ID));
the class TopArticlesLambdaExample method buildTopArticlesStream.
static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
// Serdes used in this example
final Serde<String> stringSerde = Serdes.String();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
keyAvroSerde.configure(serdeConfig, true);
final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
valueAvroSerde.configure(serdeConfig, false);
final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<byte[], GenericRecord> views =;
final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
final Schema schema = new Schema.Parser().parse(statsSchema);
final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
final GenericRecord clone = new GenericData.Record(article.getSchema());
clone.put("user", "user");
clone.put("page", article.get("page"));
clone.put("industry", article.get("industry"));
return new KeyValue<>(clone, clone);
final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
(windowedArticle, count) -> {
// project on the industry field for key
Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
// add the page into the value
GenericRecord viewStats = new GenericData.Record(schema);
viewStats.put("page", windowedArticle.key().get("page"));
viewStats.put("user", "user");
viewStats.put("industry", windowedArticle.key().get("industry"));
viewStats.put("count", count);
return new KeyValue<>(windowedIndustry, viewStats);
}, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
() -> new PriorityQueue<>(comparator), // the "add" aggregator
(windowedIndustry, record, queue) -> {
return queue;
}, // the "remove" aggregator
(windowedIndustry, record, queue) -> {
return queue;
}, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
final int topN = 100;
final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < topN; i++) {
final GenericRecord record = queue.poll();
if (record == null) {
return sb.toString();
topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
return new KafkaStreams(, streamsConfiguration);
the class WordCountLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
// For illustrative purposes we disable record caches
streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// Set up serializers and deserializers, which we will use for overriding the default serdes
// specified above.
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
// In the subsequent lines we define the processing topology of the Streams application.
final StreamsBuilder builder = new StreamsBuilder();
// Construct a `KStream` from the input topic "streams-plaintext-input", where message values
// represent lines of text (for the sake of this example, we ignore whatever may be stored
// in the message keys).
// Note: We could also just call `"streams-plaintext-input")` if we wanted to leverage
// the default serdes specified in the Streams configuration above, because these defaults
// match what's in the actual topic. However we explicitly set the deserializers in the
// call to `stream()` below in order to show how that's done, too.
final KStream<String, String> textLines ="streams-plaintext-input");
final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
final KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
// Write the `KTable<String, Long>` to the output topic.
wordCounts.toStream().to("streams-wordcount-output", Produced.with(stringSerde, longSerde));
// Now that we have finished the definition of the processing topology we can actually run
// it via `start()`. The Streams application as a whole can be launched just like any
// normal Java application that has a `main()` method.
final KafkaStreams streams = new KafkaStreams(, streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `` for a production-like example.
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));