Search in sources :

Example 6 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class StreamToTableJoinIntegrationTest method shouldCountClicksPerRegion.

@Test
public void shouldCountClicksPerRegion() throws Exception {
    // Input 1: Clicks per user (multiple records allowed per user).
    List<KeyValue<String, Long>> userClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("dave", 56L), new KeyValue<>("eve", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("fang", 99L));
    // Input 2: Region per user (multiple records allowed per user).
    List<KeyValue<String, String>> userRegions = Arrays.asList(new KeyValue<>("alice", "asia"), /* Alice lived in Asia originally... */
    new KeyValue<>("bob", "americas"), new KeyValue<>("chao", "asia"), new KeyValue<>("dave", "europe"), new KeyValue<>("alice", "europe"), /* ...but moved to Europe some time later. */
    new KeyValue<>("eve", "americas"), new KeyValue<>("fang", "asia"));
    List<KeyValue<String, Long>> expectedClicksPerRegion = Arrays.asList(new KeyValue<>("americas", 101L), new KeyValue<>("europe", 109L), new KeyValue<>("asia", 124L));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    // This KStream contains information such as "alice" -> 13L.
    // 
    // Because this is a KStream ("record stream"), multiple records for the same user will be
    // considered as separate click-count events, each of which will be added to the total count.
    KStream<String, Long> userClicksStream = builder.stream(userClicksTopic, Consumed.with(stringSerde, longSerde));
    // This KTable contains information such as "alice" -> "europe".
    // 
    // Because this is a KTable ("changelog stream"), only the latest value (here: region) for a
    // record key will be considered at the time when a new user-click record (see above) is
    // received for the `leftJoin` below.  Any previous region values are being considered out of
    // date.  This behavior is quite different to the KStream for user clicks above.
    // 
    // For example, the user "alice" will be considered to live in "europe" (although originally she
    // lived in "asia") because, at the time her first user-click record is being received and
    // subsequently processed in the `leftJoin`, the latest region update for "alice" is "europe"
    // (which overrides her previous region value of "asia").
    KTable<String, String> userRegionsTable = builder.table(userRegionsTopic);
    // Compute the number of clicks per region, e.g. "europe" -> 13L.
    // 
    // The resulting KTable is continuously being updated as new data records are arriving in the
    // input KStream `userClicksStream` and input KTable `userRegionsTable`.
    KTable<String, Long> clicksPerRegion = userClicksStream.leftJoin(userRegionsTable, (clicks, region) -> new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks)).map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(), regionWithClicks.getClicks())).groupByKey(Serialized.with(stringSerde, longSerde)).reduce((firstClicks, secondClicks) -> firstClicks + secondClicks);
    // Write the (continuously updating) results to the output topic.
    clicksPerRegion.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Publish user-region information.
    // 
    // To keep this code example simple and easier to understand/reason about, we publish all
    // user-region records before any user-click records (cf. step 3).  In practice though,
    // data records would typically be arriving concurrently in both input streams/topics.
    Properties userRegionsProducerConfig = new Properties();
    userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userRegionsTopic, userRegions, userRegionsProducerConfig);
    // 
    // Step 3: Publish some user click events.
    // 
    Properties userClicksProducerConfig = new Properties();
    userClicksProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    userClicksProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    userClicksProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    userClicksProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userClicksTopic, userClicks, userClicksProducerConfig);
    // 
    // Step 4: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "join-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<KeyValue<String, Long>> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedClicksPerRegion.size());
    streams.close();
    assertThat(actualClicksPerRegion).containsExactlyElementsOf(expectedClicksPerRegion);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KStream(org.apache.kafka.streams.kstream.KStream) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Consumed(org.apache.kafka.streams.Consumed) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) TestUtils(org.apache.kafka.test.TestUtils) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) List(java.util.List) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 7 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class WordCountLambdaIntegrationTest method shouldCountWords.

@Test
public void shouldCountWords() throws Exception {
    List<String> inputValues = Arrays.asList("Hello Kafka Streams", "All streams lead to Kafka", "Join Kafka Summit", "И теперь пошли русские слова");
    List<KeyValue<String, Long>> expectedWordCounts = Arrays.asList(new KeyValue<>("hello", 1L), new KeyValue<>("all", 1L), new KeyValue<>("streams", 2L), new KeyValue<>("lead", 1L), new KeyValue<>("to", 1L), new KeyValue<>("join", 1L), new KeyValue<>("kafka", 3L), new KeyValue<>("summit", 1L), new KeyValue<>("и", 1L), new KeyValue<>("теперь", 1L), new KeyValue<>("пошли", 1L), new KeyValue<>("русские", 1L), new KeyValue<>("слова", 1L));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, String> textLines = builder.stream(inputTopic);
    Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
    KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
    wordCounts.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<KeyValue<String, Long>> actualWordCounts = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size());
    streams.close();
    assertThat(actualWordCounts).containsExactlyElementsOf(expectedWordCounts);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) TestUtils(org.apache.kafka.test.TestUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) KStream(org.apache.kafka.streams.kstream.KStream) List(java.util.List) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Pattern(java.util.regex.Pattern) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Pattern(java.util.regex.Pattern) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 8 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class FraudService method processStreams.

private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
    // Latch onto instances of the orders and inventory topics
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, Order> orders = builder.stream(ORDERS.name(), Consumed.with(ORDERS.keySerde(), ORDERS.valueSerde())).filter((id, order) -> OrderState.CREATED.equals(order.getState()));
    // Create an aggregate of the total value by customer and hold it with the order. We use session windows to
    // detect periods of activity.
    KTable<Windowed<Long>, OrderValue> aggregate = orders.groupBy((id, order) -> order.getCustomerId(), Serialized.with(Serdes.Long(), ORDERS.valueSerde())).windowedBy(SessionWindows.with(60 * MIN)).aggregate(OrderValue::new, // Calculate running total for each customer within this window
    (custId, order, total) -> new OrderValue(order, total.getValue() + order.getQuantity() * order.getPrice()), // include a merger as we're using session windows.
    (k, a, b) -> simpleMerge(a, b), Materialized.with(null, Schemas.ORDER_VALUE_SERDE));
    // Ditch the windowing and rekey
    KStream<String, OrderValue> ordersWithTotals = aggregate.toStream((windowedKey, orderValue) -> windowedKey.key()).filter(// When elements are evicted from a session window they create delete events. Filter these out.
    (k, v) -> v != null).selectKey((id, orderValue) -> orderValue.getOrder().getId());
    // Now branch the stream into two, for pass and fail, based on whether the windowed total is over Fraud Limit
    KStream<String, OrderValue>[] forks = ordersWithTotals.branch((id, orderValue) -> orderValue.getValue() >= FRAUD_LIMIT, (id, orderValue) -> orderValue.getValue() < FRAUD_LIMIT);
    forks[0].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, FAIL)).to(ORDER_VALIDATIONS.name(), Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
    forks[1].mapValues(orderValue -> new OrderValidation(orderValue.getOrder().getId(), FRAUD_CHECK, PASS)).to(ORDER_VALIDATIONS.name(), Produced.with(ORDER_VALIDATIONS.keySerde(), ORDER_VALIDATIONS.valueSerde()));
    // disable caching to ensure a complete aggregate changelog. This is a little trick we need to apply
    // as caching in Kafka Streams will conflate subsequent updates for the same key. Disabling caching ensures
    // we get a complete "changelog" from the aggregate(...) step above (i.e. every input event will have a
    // corresponding output event.
    Properties props = baseStreamsConfig(bootstrapServers, stateDir, FRAUD_SERVICE_APP_ID);
    props.setProperty(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");
    return new KafkaStreams(builder.build(), props);
}
Also used : Order(io.confluent.examples.streams.avro.microservices.Order) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Produced(org.apache.kafka.streams.kstream.Produced) SessionWindows(org.apache.kafka.streams.kstream.SessionWindows) Serialized(org.apache.kafka.streams.kstream.Serialized) LoggerFactory(org.slf4j.LoggerFactory) FRAUD_CHECK(io.confluent.examples.streams.avro.microservices.OrderValidationType.FRAUD_CHECK) KStream(org.apache.kafka.streams.kstream.KStream) Consumed(org.apache.kafka.streams.Consumed) Windowed(org.apache.kafka.streams.kstream.Windowed) Serdes(org.apache.kafka.common.serialization.Serdes) ORDER_VALIDATIONS(io.confluent.examples.streams.microservices.domain.Schemas.Topics.ORDER_VALIDATIONS) Order(io.confluent.examples.streams.avro.microservices.Order) OrderValue(io.confluent.examples.streams.avro.microservices.OrderValue) OrderState(io.confluent.examples.streams.avro.microservices.OrderState) MicroserviceUtils.parseArgsAndConfigure(io.confluent.examples.streams.microservices.util.MicroserviceUtils.parseArgsAndConfigure) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Logger(org.slf4j.Logger) Properties(java.util.Properties) ORDERS(io.confluent.examples.streams.microservices.domain.Schemas.Topics.ORDERS) Schemas(io.confluent.examples.streams.microservices.domain.Schemas) MicroserviceUtils.addShutdownHookAndBlock(io.confluent.examples.streams.microservices.util.MicroserviceUtils.addShutdownHookAndBlock) FAIL(io.confluent.examples.streams.avro.microservices.OrderValidationResult.FAIL) OrderValidation(io.confluent.examples.streams.avro.microservices.OrderValidation) Materialized(org.apache.kafka.streams.kstream.Materialized) PASS(io.confluent.examples.streams.avro.microservices.OrderValidationResult.PASS) MicroserviceUtils.baseStreamsConfig(io.confluent.examples.streams.microservices.util.MicroserviceUtils.baseStreamsConfig) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) OrderValue(io.confluent.examples.streams.avro.microservices.OrderValue) KStream(org.apache.kafka.streams.kstream.KStream) OrderValidation(io.confluent.examples.streams.avro.microservices.OrderValidation) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 9 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class InventoryService method processStreams.

private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
    // Latch onto instances of the orders and inventory topics
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, Order> orders = builder.stream(Topics.ORDERS.name(), Consumed.with(Topics.ORDERS.keySerde(), Topics.ORDERS.valueSerde()));
    KTable<Product, Integer> warehouseInventory = builder.table(Topics.WAREHOUSE_INVENTORY.name(), Consumed.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.WAREHOUSE_INVENTORY.valueSerde()));
    // Create a store to reserve inventory whilst the order is processed.
    // This will be prepopulated from Kafka before the service starts processing
    StoreBuilder reservedStock = Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore(RESERVED_STOCK_STORE_NAME), Topics.WAREHOUSE_INVENTORY.keySerde(), Serdes.Long()).withLoggingEnabled(new HashMap<>());
    builder.addStateStore(reservedStock);
    // First change orders stream to be keyed by Product (so we can join with warehouse inventory)
    orders.selectKey((id, order) -> order.getProduct()).filter((id, order) -> OrderState.CREATED.equals(order.getState())).join(warehouseInventory, KeyValue::new, Joined.with(Topics.WAREHOUSE_INVENTORY.keySerde(), Topics.ORDERS.valueSerde(), Serdes.Integer())).transform(InventoryValidator::new, RESERVED_STOCK_STORE_NAME).to(Topics.ORDER_VALIDATIONS.name(), Produced.with(Topics.ORDER_VALIDATIONS.keySerde(), Topics.ORDER_VALIDATIONS.valueSerde()));
    return new KafkaStreams(builder.build(), MicroserviceUtils.baseStreamsConfig(bootstrapServers, stateDir, INVENTORY_SERVICE_APP_ID));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Order(io.confluent.examples.streams.avro.microservices.Order) Produced(org.apache.kafka.streams.kstream.Produced) Stores(org.apache.kafka.streams.state.Stores) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) MicroserviceUtils(io.confluent.examples.streams.microservices.util.MicroserviceUtils) Consumed(org.apache.kafka.streams.Consumed) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Serdes(org.apache.kafka.common.serialization.Serdes) INVENTORY_CHECK(io.confluent.examples.streams.avro.microservices.OrderValidationType.INVENTORY_CHECK) Order(io.confluent.examples.streams.avro.microservices.Order) MicroserviceUtils.parseArgsAndConfigure(io.confluent.examples.streams.microservices.util.MicroserviceUtils.parseArgsAndConfigure) OrderState(io.confluent.examples.streams.avro.microservices.OrderState) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Logger(org.slf4j.Logger) Transformer(org.apache.kafka.streams.kstream.Transformer) KeyValue(org.apache.kafka.streams.KeyValue) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) Topics(io.confluent.examples.streams.microservices.domain.Schemas.Topics) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) MicroserviceUtils.addShutdownHookAndBlock(io.confluent.examples.streams.microservices.util.MicroserviceUtils.addShutdownHookAndBlock) FAIL(io.confluent.examples.streams.avro.microservices.OrderValidationResult.FAIL) PASS(io.confluent.examples.streams.avro.microservices.OrderValidationResult.PASS) OrderValidation(io.confluent.examples.streams.avro.microservices.OrderValidation) Product(io.confluent.examples.streams.avro.microservices.Product) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) Product(io.confluent.examples.streams.avro.microservices.Product)

Example 10 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class TopArticlesLambdaExample method buildTopArticlesStream.

static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // Serdes used in this example
    final Serde<String> stringSerde = Serdes.String();
    final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
    keyAvroSerde.configure(serdeConfig, true);
    final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
    valueAvroSerde.configure(serdeConfig, false);
    final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
    final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
    final Schema schema = new Schema.Parser().parse(statsSchema);
    final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
        final GenericRecord clone = new GenericData.Record(article.getSchema());
        clone.put("user", "user");
        clone.put("page", article.get("page"));
        clone.put("industry", article.get("industry"));
        return new KeyValue<>(clone, clone);
    });
    final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
    final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
    final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
    (windowedArticle, count) -> {
        // project on the industry field for key
        Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
        // add the page into the value
        GenericRecord viewStats = new GenericData.Record(schema);
        viewStats.put("page", windowedArticle.key().get("page"));
        viewStats.put("user", "user");
        viewStats.put("industry", windowedArticle.key().get("industry"));
        viewStats.put("count", count);
        return new KeyValue<>(windowedIndustry, viewStats);
    }, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
    () -> new PriorityQueue<>(comparator), // the "add" aggregator
    (windowedIndustry, record, queue) -> {
        queue.add(record);
        return queue;
    }, // the "remove" aggregator
    (windowedIndustry, record, queue) -> {
        queue.remove(record);
        return queue;
    }, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
    final int topN = 100;
    final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
        final StringBuilder sb = new StringBuilder();
        for (int i = 0; i < topN; i++) {
            final GenericRecord record = queue.poll();
            if (record == null) {
                break;
            }
            sb.append(record.get("page").toString());
            sb.append("\n");
        }
        return sb.toString();
    });
    topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) PriorityQueue(java.util.PriorityQueue) KStream(org.apache.kafka.streams.kstream.KStream) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) Utf8(org.apache.avro.util.Utf8) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) TimeUnit(java.util.concurrent.TimeUnit) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) PriorityQueue(java.util.PriorityQueue) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)

Aggregations

KTable (org.apache.kafka.streams.kstream.KTable)56 Serdes (org.apache.kafka.common.serialization.Serdes)51 Properties (java.util.Properties)50 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)49 KeyValue (org.apache.kafka.streams.KeyValue)41 StreamsConfig (org.apache.kafka.streams.StreamsConfig)41 Test (org.junit.Test)40 KStream (org.apache.kafka.streams.kstream.KStream)39 Materialized (org.apache.kafka.streams.kstream.Materialized)35 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)32 Consumed (org.apache.kafka.streams.kstream.Consumed)32 Produced (org.apache.kafka.streams.kstream.Produced)28 Bytes (org.apache.kafka.common.utils.Bytes)27 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)27 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)27 Grouped (org.apache.kafka.streams.kstream.Grouped)25 List (java.util.List)24 Serde (org.apache.kafka.common.serialization.Serde)24 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)24 KafkaStreams (org.apache.kafka.streams.KafkaStreams)24