Search in sources :

Example 51 with KeyValue

use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.

the class PageViewRegionExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // Create a stream of page view events from the PageViews topic, where the key of
    // a record is assumed to be null and the value an Avro GenericRecord
    // that represents the full details of the page view event. See `pageview.avsc` under
    // `src/main/avro/` for the corresponding Avro schema.
    final KStream<String, GenericRecord> views = builder.stream("PageViews");
    // Create a keyed stream of page view events from the PageViews stream,
    // by extracting the user id (String) from the Avro value
    final KStream<String, GenericRecord> viewsByUser = views.map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {

        @Override
        public KeyValue<String, GenericRecord> apply(final String dummy, final GenericRecord record) {
            return new KeyValue<>(record.get("user").toString(), record);
        }
    });
    // Create a changelog stream for user profiles from the UserProfiles topic,
    // where the key of a record is assumed to be the user id (String) and its value
    // an Avro GenericRecord.  See `userprofile.avsc` under `src/main/avro/` for the
    // corresponding Avro schema.
    final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
    // Create a changelog stream as a projection of the value to the region attribute only
    final KTable<String, String> userRegions = userProfiles.mapValues(new ValueMapper<GenericRecord, String>() {

        @Override
        public String apply(final GenericRecord record) {
            return record.get("region").toString();
        }
    });
    // We must specify the Avro schemas for all intermediate (Avro) classes, if any.
    // In this example, we want to create an intermediate GenericRecord to hold the view region
    // (see below).
    final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
    final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
    final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, new ValueJoiner<GenericRecord, String, GenericRecord>() {

        @Override
        public GenericRecord apply(final GenericRecord view, final String region) {
            final GenericRecord viewRegion = new GenericData.Record(schema);
            viewRegion.put("user", view.get("user"));
            viewRegion.put("page", view.get("page"));
            viewRegion.put("region", region);
            return viewRegion;
        }
    }).map(new KeyValueMapper<String, GenericRecord, KeyValue<String, GenericRecord>>() {

        @Override
        public KeyValue<String, GenericRecord> apply(final String user, final GenericRecord viewRegion) {
            return new KeyValue<>(viewRegion.get("region").toString(), viewRegion);
        }
    }).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
    // Note: The following operations would NOT be needed for the actual pageview-by-region
    // computation, which would normally stop at `count` above.  We use the operations
    // below only to "massage" the output data so it is easier to inspect on the console via
    // kafka-console-consumer.
    final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream(new KeyValueMapper<Windowed<String>, Long, String>() {

        @Override
        public String apply(final Windowed<String> windowedRegion, final Long count) {
            return windowedRegion.toString();
        }
    });
    // write to the result topic
    viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

        @Override
        public void run() {
            streams.close();
        }
    }));
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) GenericRecord(org.apache.avro.generic.GenericRecord) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) GenericData(org.apache.avro.generic.GenericData) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 52 with KeyValue

use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.

the class PageViewRegionLambdaExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "pageview-region-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "pageview-region-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // Create a stream of page view events from the PageViews topic, where the key of
    // a record is assumed to be null and the value an Avro GenericRecord
    // that represents the full details of the page view event. See `pageview.avsc` under
    // `src/main/avro/` for the corresponding Avro schema.
    final KStream<String, GenericRecord> views = builder.stream("PageViews");
    // Create a keyed stream of page view events from the PageViews stream,
    // by extracting the user id (String) from the Avro value
    final KStream<String, GenericRecord> viewsByUser = views.map((dummy, record) -> new KeyValue<>(record.get("user").toString(), record));
    // Create a changelog stream for user profiles from the UserProfiles topic,
    // where the key of a record is assumed to be the user id (String) and its value
    // an Avro GenericRecord.  See `userprofile.avsc` under `src/main/avro/` for the
    // corresponding Avro schema.
    final KTable<String, GenericRecord> userProfiles = builder.table("UserProfiles");
    // Create a changelog stream as a projection of the value to the region attribute only
    final KTable<String, String> userRegions = userProfiles.mapValues(record -> record.get("region").toString());
    // We must specify the Avro schemas for all intermediate (Avro) classes, if any.
    // In this example, we want to create an intermediate GenericRecord to hold the view region.
    // See `pageviewregion.avsc` under `src/main/avro/`.
    final InputStream pageViewRegionSchema = PageViewRegionLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewregion.avsc");
    final Schema schema = new Schema.Parser().parse(pageViewRegionSchema);
    final KTable<Windowed<String>, Long> viewsByRegion = viewsByUser.leftJoin(userRegions, (view, region) -> {
        GenericRecord viewRegion = new GenericData.Record(schema);
        viewRegion.put("user", view.get("user"));
        viewRegion.put("page", view.get("page"));
        viewRegion.put("region", region);
        return viewRegion;
    }).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").toString(), viewRegion)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(5)).advanceBy(TimeUnit.MINUTES.toMillis(1))).count();
    // Note: The following operations would NOT be needed for the actual pageview-by-region
    // computation, which would normally stop at `count` above.  We use the operations
    // below only to "massage" the output data so it is easier to inspect on the console via
    // kafka-console-consumer.
    final KStream<String, Long> viewsByRegionForConsole = viewsByRegion.toStream((windowedRegion, count) -> windowedRegion.toString());
    viewsByRegionForConsole.to("PageViewsByRegion", Produced.with(stringSerde, longSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KStream(org.apache.kafka.streams.kstream.KStream) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) TimeUnit(java.util.concurrent.TimeUnit) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 53 with KeyValue

use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.

the class StreamToTableJoinIntegrationTest method shouldCountClicksPerRegion.

@Test
public void shouldCountClicksPerRegion() throws Exception {
    // Input 1: Clicks per user (multiple records allowed per user).
    List<KeyValue<String, Long>> userClicks = Arrays.asList(new KeyValue<>("alice", 13L), new KeyValue<>("bob", 4L), new KeyValue<>("chao", 25L), new KeyValue<>("bob", 19L), new KeyValue<>("dave", 56L), new KeyValue<>("eve", 78L), new KeyValue<>("alice", 40L), new KeyValue<>("fang", 99L));
    // Input 2: Region per user (multiple records allowed per user).
    List<KeyValue<String, String>> userRegions = Arrays.asList(new KeyValue<>("alice", "asia"), /* Alice lived in Asia originally... */
    new KeyValue<>("bob", "americas"), new KeyValue<>("chao", "asia"), new KeyValue<>("dave", "europe"), new KeyValue<>("alice", "europe"), /* ...but moved to Europe some time later. */
    new KeyValue<>("eve", "americas"), new KeyValue<>("fang", "asia"));
    List<KeyValue<String, Long>> expectedClicksPerRegion = Arrays.asList(new KeyValue<>("americas", 101L), new KeyValue<>("europe", 109L), new KeyValue<>("asia", 124L));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-table-join-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    // This KStream contains information such as "alice" -> 13L.
    // 
    // Because this is a KStream ("record stream"), multiple records for the same user will be
    // considered as separate click-count events, each of which will be added to the total count.
    KStream<String, Long> userClicksStream = builder.stream(userClicksTopic, Consumed.with(stringSerde, longSerde));
    // This KTable contains information such as "alice" -> "europe".
    // 
    // Because this is a KTable ("changelog stream"), only the latest value (here: region) for a
    // record key will be considered at the time when a new user-click record (see above) is
    // received for the `leftJoin` below.  Any previous region values are being considered out of
    // date.  This behavior is quite different to the KStream for user clicks above.
    // 
    // For example, the user "alice" will be considered to live in "europe" (although originally she
    // lived in "asia") because, at the time her first user-click record is being received and
    // subsequently processed in the `leftJoin`, the latest region update for "alice" is "europe"
    // (which overrides her previous region value of "asia").
    KTable<String, String> userRegionsTable = builder.table(userRegionsTopic);
    // Compute the number of clicks per region, e.g. "europe" -> 13L.
    // 
    // The resulting KTable is continuously being updated as new data records are arriving in the
    // input KStream `userClicksStream` and input KTable `userRegionsTable`.
    KTable<String, Long> clicksPerRegion = userClicksStream.leftJoin(userRegionsTable, (clicks, region) -> new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks)).map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(), regionWithClicks.getClicks())).groupByKey(Serialized.with(stringSerde, longSerde)).reduce((firstClicks, secondClicks) -> firstClicks + secondClicks);
    // Write the (continuously updating) results to the output topic.
    clicksPerRegion.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Publish user-region information.
    // 
    // To keep this code example simple and easier to understand/reason about, we publish all
    // user-region records before any user-click records (cf. step 3).  In practice though,
    // data records would typically be arriving concurrently in both input streams/topics.
    Properties userRegionsProducerConfig = new Properties();
    userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userRegionsTopic, userRegions, userRegionsProducerConfig);
    // 
    // Step 3: Publish some user click events.
    // 
    Properties userClicksProducerConfig = new Properties();
    userClicksProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    userClicksProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    userClicksProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    userClicksProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    userClicksProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userClicksTopic, userClicks, userClicksProducerConfig);
    // 
    // Step 4: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "join-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<KeyValue<String, Long>> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedClicksPerRegion.size());
    streams.close();
    assertThat(actualClicksPerRegion).containsExactlyElementsOf(expectedClicksPerRegion);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KStream(org.apache.kafka.streams.kstream.KStream) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Consumed(org.apache.kafka.streams.Consumed) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) TestUtils(org.apache.kafka.test.TestUtils) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) List(java.util.List) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 54 with KeyValue

use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.

the class TableToTableJoinIntegrationTest method shouldJoinTwoTables.

@Test
public void shouldJoinTwoTables() throws Exception {
    // Input: Region per user (multiple records allowed per user).
    List<KeyValue<String, String>> userRegionRecords = Arrays.asList(new KeyValue<>("alice", "asia"), new KeyValue<>("bob", "europe"), new KeyValue<>("alice", "europe"), new KeyValue<>("charlie", "europe"), new KeyValue<>("bob", "asia"));
    // Input 2: Timestamp of last login per user (multiple records allowed per user)
    List<KeyValue<String, Long>> userLastLoginRecords = Arrays.asList(new KeyValue<>("alice", 1485500000L), new KeyValue<>("bob", 1485520000L), new KeyValue<>("alice", 1485530000L), new KeyValue<>("bob", 1485560000L));
    List<KeyValue<String, String>> expectedResults = Arrays.asList(new KeyValue<>("alice", "europe/1485500000"), new KeyValue<>("bob", "asia/1485520000"), new KeyValue<>("alice", "europe/1485530000"), new KeyValue<>("bob", "asia/1485560000"));
    List<KeyValue<String, String>> expectedResultsForJoinStateStore = Arrays.asList(new KeyValue<>("alice", "europe/1485530000"), new KeyValue<>("bob", "asia/1485560000"));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "table-table-join-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // For didactic reasons: disable record caching so we can observe every individual update record being sent downstream
    streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    KTable<String, String> userRegions = builder.table(userRegionTopic);
    KTable<String, Long> userLastLogins = builder.table(userLastLoginTopic, Consumed.with(stringSerde, longSerde));
    String storeName = "joined-store";
    userRegions.join(userLastLogins, (regionValue, lastLoginValue) -> regionValue + "/" + lastLoginValue, Materialized.as(storeName)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.String()));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Publish user regions.
    // 
    Properties regionsProducerConfig = new Properties();
    regionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    regionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    regionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    regionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    regionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userRegionTopic, userRegionRecords, regionsProducerConfig);
    // 
    // Step 3: Publish user's last login timestamps.
    // 
    Properties lastLoginProducerConfig = new Properties();
    lastLoginProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    lastLoginProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    lastLoginProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    lastLoginProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    lastLoginProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(userLastLoginTopic, userLastLoginRecords, lastLoginProducerConfig);
    // 
    // Step 4: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "stream-stream-join-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    List<KeyValue<String, String>> actualResults = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedResults.size());
    // Verify the (local) state store of the joined table.
    // For a comprehensive demonstration of interactive queries please refer to KafkaMusicExample.
    ReadOnlyKeyValueStore<String, String> readOnlyKeyValueStore = streams.store(storeName, QueryableStoreTypes.keyValueStore());
    KeyValueIterator<String, String> keyValueIterator = readOnlyKeyValueStore.all();
    assertThat(keyValueIterator).containsExactlyElementsOf(expectedResultsForJoinStateStore);
    streams.close();
    assertThat(actualResults).containsExactlyElementsOf(expectedResults);
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 55 with KeyValue

use of org.apache.kafka.streams.KeyValue in project kafka-streams-examples by confluentinc.

the class WordCountLambdaIntegrationTest method shouldCountWords.

@Test
public void shouldCountWords() throws Exception {
    List<String> inputValues = Arrays.asList("Hello Kafka Streams", "All streams lead to Kafka", "Join Kafka Summit", "И теперь пошли русские слова");
    List<KeyValue<String, Long>> expectedWordCounts = Arrays.asList(new KeyValue<>("hello", 1L), new KeyValue<>("all", 1L), new KeyValue<>("streams", 2L), new KeyValue<>("lead", 1L), new KeyValue<>("to", 1L), new KeyValue<>("join", 1L), new KeyValue<>("kafka", 3L), new KeyValue<>("summit", 1L), new KeyValue<>("и", 1L), new KeyValue<>("теперь", 1L), new KeyValue<>("пошли", 1L), new KeyValue<>("русские", 1L), new KeyValue<>("слова", 1L));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    KStream<String, String> textLines = builder.stream(inputTopic);
    Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
    KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
    wordCounts.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "wordcount-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<KeyValue<String, Long>> actualWordCounts = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedWordCounts.size());
    streams.close();
    assertThat(actualWordCounts).containsExactlyElementsOf(expectedWordCounts);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) TestUtils(org.apache.kafka.test.TestUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) KStream(org.apache.kafka.streams.kstream.KStream) List(java.util.List) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Pattern(java.util.regex.Pattern) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Pattern(java.util.regex.Pattern) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Aggregations

KeyValue (org.apache.kafka.streams.KeyValue)343 Test (org.junit.Test)268 Properties (java.util.Properties)127 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)123 Windowed (org.apache.kafka.streams.kstream.Windowed)105 ArrayList (java.util.ArrayList)90 KafkaStreams (org.apache.kafka.streams.KafkaStreams)82 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)74 Bytes (org.apache.kafka.common.utils.Bytes)74 TopologyTestDriver (org.apache.kafka.streams.TopologyTestDriver)68 IntegrationTest (org.apache.kafka.test.IntegrationTest)66 Serdes (org.apache.kafka.common.serialization.Serdes)65 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)62 StreamsConfig (org.apache.kafka.streams.StreamsConfig)55 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)53 KStream (org.apache.kafka.streams.kstream.KStream)52 SessionWindow (org.apache.kafka.streams.kstream.internals.SessionWindow)46 KTable (org.apache.kafka.streams.kstream.KTable)43 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)42 Consumed (org.apache.kafka.streams.kstream.Consumed)41