use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class WordCountLambdaIntegrationTest method shouldCountWords.
@Test
public void shouldCountWords() throws Exception {
List<String> inputValues = Arrays.asList("Hello Kafka Streams", "All streams lead to Kafka", "Join Kafka Summit", "И теперь пошли русские слова");
List<KeyValue<String, Long>> expectedWordCounts = Arrays.asList(new KeyValue<>("hello", 1L), new KeyValue<>("all", 1L), new KeyValue<>("streams", 2L), new KeyValue<>("lead", 1L), new KeyValue<>("to", 1L), new KeyValue<>("join", 1L), new KeyValue<>("kafka", 3L), new KeyValue<>("summit", 1L), new KeyValue<>("и", 1L), new KeyValue<>("теперь", 1L), new KeyValue<>("пошли", 1L), new KeyValue<>("русские", 1L), new KeyValue<>("слова", 1L));
//
// Step 1: Configure and start the processor topology.
//
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> textLines = builder.stream(inputTopic);
Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
wordCounts.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
List<KeyValue<String, Long>> actualWordCounts = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size());
streams.close();
assertThat(actualWordCounts).containsExactlyElementsOf(expectedWordCounts);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class ValidateStateWithInteractiveQueriesLambdaIntegrationTest method shouldComputeMaxValuePerKey.
@Test
public void shouldComputeMaxValuePerKey() throws Exception {
// A user may be listed multiple times.
List<KeyValue<String, Long>> inputUserClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("chao", 56L), new KeyValue<>("alice", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("bob", 3L));
Map<String, Long> expectedMaxClicksPerUser = new HashMap<String, Long>() {
{
put("alice", 78L);
put("bob", 19L);
put("chao", 56L);
}
};
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "validating-with-interactive-queries-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 2 * 1000);
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
KStream<String, Long> input = builder.stream(inputTopic);
// rolling MAX() aggregation
String maxStore = "max-store";
input.groupByKey().aggregate(() -> Long.MIN_VALUE, (aggKey, value, aggregate) -> Math.max(value, aggregate), Materialized.as(maxStore));
// windowed MAX() aggregation
String maxWindowStore = "max-window-store";
input.groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(1L)).until(TimeUnit.MINUTES.toMillis(5L))).aggregate(() -> Long.MIN_VALUE, (aggKey, value, aggregate) -> Math.max(value, aggregate), Materialized.as(maxWindowStore));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(inputTopic, inputUserClicks, producerConfig);
//
// Step 3: Validate the application's state by interactively querying its state stores.
//
ReadOnlyKeyValueStore<String, Long> keyValueStore = IntegrationTestUtils.waitUntilStoreIsQueryable(maxStore, QueryableStoreTypes.keyValueStore(), streams);
ReadOnlyWindowStore<String, Long> windowStore = IntegrationTestUtils.waitUntilStoreIsQueryable(maxWindowStore, QueryableStoreTypes.windowStore(), streams);
// Wait a bit so that the input data can be fully processed to ensure that the stores can
// actually be populated with data. Running the build on (slow) Travis CI in particular
// requires a few seconds to run this test reliably.
Thread.sleep(3000);
IntegrationTestUtils.assertThatKeyValueStoreContains(keyValueStore, expectedMaxClicksPerUser);
IntegrationTestUtils.assertThatOldestWindowContains(windowStore, expectedMaxClicksPerUser);
streams.close();
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class WordCountInteractiveQueriesExample method createStreams.
static KafkaStreams createStreams(final Properties streamsConfiguration) {
final Serde<String> stringSerde = Serdes.String();
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> textLines = builder.stream(TEXT_LINES_TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))).groupBy((key, word) -> word, Serialized.with(stringSerde, stringSerde));
// Create a State Store for with the all time word count
groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("word-count").withValueSerde(Serdes.Long()));
// Create a Windowed State Store that contains the word count for every
// 1 minute
groupedByWord.windowedBy(TimeWindows.of(60000)).count(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("windowed-word-count").withValueSerde(Serdes.Long()));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class FraudService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
// Latch onto instances of the orders and inventory topics
StreamsBuilder builder = new StreamsBuilder();
KStream<String, Order> orders = builder.stream(ORDERS.name(), Consumed.with(ORDERS.keySerde(), ORDERS.valueSerde())).filter((id, order) -> OrderState.CREATED.equals(order.getState()));
// Create an aggregate of the total value by customer and hold it with the order. We use session windows to
// detect periods of activity.
KTable<Windowed<Long>, OrderValue> aggregate = orders.groupBy((id, order) -> order.getCustomerId(), Serialized.with(Serdes.Long(), ORDERS.valueSerde())).windowedBy(SessionWindows.with(60 * MIN)).aggregate(OrderValue::new, // Calculate running total for each customer within this window
(custId, order, total) -> new OrderValue(order, total.getValue() + order.getQuantity() * order.getPrice()), // include a merger as we're using session windows.
(k, a, b) -> simpleMerge(a, b), Materialized.with(null, Schemas.ORDER_VALUE_SERDE));
// Ditch the windowing and rekey
KStream<String, OrderValue> ordersWithTotals = aggregate.toStream((windowedKey, orderValue) -> windowedKey.key()).filter(// When elements are evicted from a session window they create delete events. Filter these out.
(k, v) -> v != null).selectKey((id, orderValue) -> orderValue.getOrder().getId());
// Now branch the stream into two, for pass and fail, based on whether the windowed total is over Fraud Limit
KStream<String, OrderValue>[] forks = ordersWithTotals.branch((id, orderValue) -> orderValue.getValue() >= FRAUD_LIMIT, (id, orderValue) -> orderValue.getValue() < FRAUD_LIMIT);
forks[0].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, FAIL)).to(ORDER_VALIDATIONS.name(), Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
forks[1].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, PASS)).to(ORDER_VALIDATIONS.name(), Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
// disable caching to ensure a complete aggregate changelog. This is a little trick we need to apply
// as caching in Kafka Streams will conflate subsequent updates for the same key. Disabling caching ensures
// we get a complete "changelog" from the aggregate(...) step above (i.e. every input event will have a
// corresponding output event.
Properties props = baseStreamsConfig(bootstrapServers, stateDir, FRAUD_SERVICE_APP_ID);
props.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
return new KafkaStreams(builder.build(), props);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class InventoryService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
// Latch onto instances of the orders and inventory topics
StreamsBuilder builder = new StreamsBuilder();
KStream<String, Order> orders = builder.stream(Topics.ORDERS.name(), Consumed.with(Topics.ORDERS.keySerde(), Topics.ORDERS.valueSerde()));
KTable<Product, Integer> warehouseInventory = builder.table(Topics.WAREHOUSE_INVENTORY.name(), Consumed.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.WAREHOUSE_INVENTORY.valueSerde()));
// Create a store to reserve inventory whilst the order is processed.
// This will be prepopulated from Kafka before the service starts processing
StoreBuilder reservedStock = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(RESERVED_STOCK_STORE_NAME), Topics.WAREHOUSE_INVENTORY.keySerde(), Serdes.Long()).withLoggingEnabled(new HashMap<>());
builder.addStateStore(reservedStock);
// First change orders stream to be keyed by Product (so we can join with warehouse inventory)
orders.selectKey((id, order) -> order.getProduct()).filter((id, order) -> OrderState.CREATED.equals(order.getState())).join(warehouseInventory, KeyValue::new, Joined.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.ORDERS.valueSerde(), Serdes.Integer())).transform(InventoryValidator::new, RESERVED_STOCK_STORE_NAME).to(Topics.ORDER_VALIDATIONS.name(), Produced.with(Topics.ORDER_VALIDATIONS.keySerde(), Topics.ORDER_VALIDATIONS.valueSerde()));
return new KafkaStreams(builder.build(), MicroserviceUtils.baseStreamsConfig(bootstrapServers, stateDir, INVENTORY_SERVICE_APP_ID));
}
Aggregations