use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.
the class ValidateStateWithInteractiveQueriesLambdaIntegrationTest method shouldComputeMaxValuePerKey.
@Test
public void shouldComputeMaxValuePerKey() throws Exception {
// A user may be listed multiple times.
List<KeyValue<String, Long>> inputUserClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("chao", 56L), new KeyValue<>("alice", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("bob", 3L));
Map<String, Long> expectedMaxClicksPerUser = new HashMap<String, Long>() {
{
put("alice", 78L);
put("bob", 19L);
put("chao", 56L);
}
};
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "validating-with-interactive-queries-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 2 * 1000);
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
KStream<String, Long> input = builder.stream(inputTopic);
// rolling MAX() aggregation
String maxStore = "max-store";
input.groupByKey().aggregate(() -> Long.MIN_VALUE, (aggKey, value, aggregate) -> Math.max(value, aggregate), Materialized.as(maxStore));
// windowed MAX() aggregation
String maxWindowStore = "max-window-store";
input.groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(1L)).until(TimeUnit.MINUTES.toMillis(5L))).aggregate(() -> Long.MIN_VALUE, (aggKey, value, aggregate) -> Math.max(value, aggregate), Materialized.as(maxWindowStore));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(inputTopic, inputUserClicks, producerConfig);
//
// Step 3: Validate the application's state by interactively querying its state stores.
//
ReadOnlyKeyValueStore<String, Long> keyValueStore = IntegrationTestUtils.waitUntilStoreIsQueryable(maxStore, QueryableStoreTypes.keyValueStore(), streams);
ReadOnlyWindowStore<String, Long> windowStore = IntegrationTestUtils.waitUntilStoreIsQueryable(maxWindowStore, QueryableStoreTypes.windowStore(), streams);
// Wait a bit so that the input data can be fully processed to ensure that the stores can
// actually be populated with data. Running the build on (slow) Travis CI in particular
// requires a few seconds to run this test reliably.
Thread.sleep(3000);
IntegrationTestUtils.assertThatKeyValueStoreContains(keyValueStore, expectedMaxClicksPerUser);
IntegrationTestUtils.assertThatOldestWindowContains(windowStore, expectedMaxClicksPerUser);
streams.close();
}
use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.
the class InventoryService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
// Latch onto instances of the orders and inventory topics
StreamsBuilder builder = new StreamsBuilder();
KStream<String, Order> orders = builder.stream(Topics.ORDERS.name(), Consumed.with(Topics.ORDERS.keySerde(), Topics.ORDERS.valueSerde()));
KTable<Product, Integer> warehouseInventory = builder.table(Topics.WAREHOUSE_INVENTORY.name(), Consumed.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.WAREHOUSE_INVENTORY.valueSerde()));
// Create a store to reserve inventory whilst the order is processed.
// This will be prepopulated from Kafka before the service starts processing
StoreBuilder reservedStock = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(RESERVED_STOCK_STORE_NAME), Topics.WAREHOUSE_INVENTORY.keySerde(), Serdes.Long()).withLoggingEnabled(new HashMap<>());
builder.addStateStore(reservedStock);
// First change orders stream to be keyed by Product (so we can join with warehouse inventory)
orders.selectKey((id, order) -> order.getProduct()).filter((id, order) -> OrderState.CREATED.equals(order.getState())).join(warehouseInventory, KeyValue::new, Joined.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.ORDERS.valueSerde(), Serdes.Integer())).transform(InventoryValidator::new, RESERVED_STOCK_STORE_NAME).to(Topics.ORDER_VALIDATIONS.name(), Produced.with(Topics.ORDER_VALIDATIONS.keySerde(), Topics.ORDER_VALIDATIONS.valueSerde()));
return new KafkaStreams(builder.build(), MicroserviceUtils.baseStreamsConfig(bootstrapServers, stateDir, INVENTORY_SERVICE_APP_ID));
}
use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.
the class TopArticlesLambdaExample method buildTopArticlesStream.
static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
// Serdes used in this example
final Serde<String> stringSerde = Serdes.String();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
keyAvroSerde.configure(serdeConfig, true);
final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
valueAvroSerde.configure(serdeConfig, false);
final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
final Schema schema = new Schema.Parser().parse(statsSchema);
final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
final GenericRecord clone = new GenericData.Record(article.getSchema());
clone.put("user", "user");
clone.put("page", article.get("page"));
clone.put("industry", article.get("industry"));
return new KeyValue<>(clone, clone);
});
final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
(windowedArticle, count) -> {
// project on the industry field for key
Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
// add the page into the value
GenericRecord viewStats = new GenericData.Record(schema);
viewStats.put("page", windowedArticle.key().get("page"));
viewStats.put("user", "user");
viewStats.put("industry", windowedArticle.key().get("industry"));
viewStats.put("count", count);
return new KeyValue<>(windowedIndustry, viewStats);
}, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
() -> new PriorityQueue<>(comparator), // the "add" aggregator
(windowedIndustry, record, queue) -> {
queue.add(record);
return queue;
}, // the "remove" aggregator
(windowedIndustry, record, queue) -> {
queue.remove(record);
return queue;
}, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
final int topN = 100;
final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < topN; i++) {
final GenericRecord record = queue.poll();
if (record == null) {
break;
}
sb.append(record.get("page").toString());
sb.append("\n");
}
return sb.toString();
});
topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.
the class WikipediaFeedAvroExample method buildWikipediaFeed.
static KafkaStreams buildWikipediaFeed(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-avro-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-avro-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// read the source stream
final KStream<String, WikiFeed> feeds = builder.stream(WIKIPEDIA_FEED);
// aggregate the new feed counts of by user
final KTable<String, Long> aggregated = feeds.filter(new Predicate<String, WikiFeed>() {
@Override
public boolean test(final String dummy, final WikiFeed value) {
return value.getIsNew();
}
}).map(new KeyValueMapper<String, WikiFeed, KeyValue<String, WikiFeed>>() {
@Override
public KeyValue<String, WikiFeed> apply(final String key, final WikiFeed value) {
return new KeyValue<>(value.getUser(), value);
}
}).groupByKey().count();
// write to the result topic, need to override serdes
aggregated.toStream().to(WIKIPEDIA_STATS, Produced.with(stringSerde, longSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.
the class EndToEndTest method shouldProcessManyValidOrdersEndToEnd.
@Test
public void shouldProcessManyValidOrdersEndToEnd() throws Exception {
client = getClient();
// Add inventory required by the inventory service
List<KeyValue<Product, Integer>> inventory = asList(new KeyValue<>(UNDERPANTS, 75), new KeyValue<>(JUMPERS, 10));
sendInventory(inventory, Topics.WAREHOUSE_INVENTORY);
// Send ten orders in succession
for (int i = 0; i < 10; i++) {
OrderBean inputOrder = new OrderBean(id(i), 2L, OrderState.CREATED, Product.JUMPERS, 1, 1d);
startTimer();
// POST & GET order
client.target(path.urlPost()).request(APPLICATION_JSON_TYPE).post(Entity.json(inputOrder));
returnedBean = client.target(path.urlGetValidated(i)).queryParam("timeout", MIN).request(APPLICATION_JSON_TYPE).get(newBean());
endTimer();
assertThat(returnedBean).isEqualTo(new OrderBean(inputOrder.getId(), inputOrder.getCustomerId(), OrderState.VALIDATED, inputOrder.getProduct(), inputOrder.getQuantity(), inputOrder.getPrice()));
}
}
Aggregations