Search in sources :

Example 31 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class UserCountsPerRegionLambdaIntegrationTest method shouldCountUsersPerRegion.

@Test
public void shouldCountUsersPerRegion() throws Exception {
    // Input: Region per user (multiple records allowed per user).
    List<KeyValue<String, String>> userRegionRecords = Arrays.asList(// This first record for Alice tells us that she is currently in Asia.
    new KeyValue<>("alice", "asia"), // First record for Bob.
    new KeyValue<>("bob", "europe"), // from Asia to Europe;  in other words, it's a location update for Alice.
    new KeyValue<>("alice", "europe"), // Second record for Bob, who moved from Europe to Asia (i.e. the opposite direction of Alice).
    new KeyValue<>("bob", "asia"));
    List<KeyValue<String, Long>> expectedUsersPerRegion = Arrays.asList(// in the end, Alice is in europe
    new KeyValue<>("europe", 1L), // in the end, Bob is in asia
    new KeyValue<>("asia", 1L));
    // 
    // Step 1: Configure and start the processor topology.
    // 
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "user-regions-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    StreamsBuilder builder = new StreamsBuilder();
    KTable<String, String> userRegionsTable = builder.table(inputTopic);
    KTable<String, Long> usersPerRegionTable = userRegionsTable.groupBy((userId, region) -> KeyValue.pair(region, region)).count();
    usersPerRegionTable.toStream().to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Publish user-region information.
    // 
    Properties userRegionsProducerConfig = new Properties();
    userRegionsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    userRegionsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    userRegionsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    userRegionsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    userRegionsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceKeyValuesSynchronously(inputTopic, userRegionRecords, userRegionsProducerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "user-regions-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<KeyValue<String, Long>> actualClicksPerRegion = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedUsersPerRegion.size());
    streams.close();
    assertThat(actualClicksPerRegion).containsExactlyElementsOf(expectedUsersPerRegion);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) TestUtils(org.apache.kafka.test.TestUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) List(java.util.List) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Example 32 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class SumLambdaIntegrationTest method shouldSumEvenNumbers.

@Test
public void shouldSumEvenNumbers() throws Exception {
    List<Integer> inputValues = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
    List<Integer> expectedValues = Collections.singletonList(30);
    // 
    // Step 1: Configure and start the processor topology.
    // 
    StreamsBuilder builder = new StreamsBuilder();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sum-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    KStream<Integer, Integer> input = builder.stream(inputTopic);
    KTable<Integer, Integer> sumOfOddNumbers = input.filter((k, v) -> v % 2 == 0).selectKey((k, v) -> 1).groupByKey().reduce((v1, v2) -> v1 + v2);
    sumOfOddNumbers.toStream().to(outputTopic);
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "sum-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
    List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
    streams.close();
    assertThat(actualValues).isEqualTo(expectedValues);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) BeforeClass(org.junit.BeforeClass) TestUtils(org.apache.kafka.test.TestUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) KStream(org.apache.kafka.streams.kstream.KStream) List(java.util.List) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) IntegerDeserializer(org.apache.kafka.common.serialization.IntegerDeserializer) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties) Test(org.junit.Test)

Example 33 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class KafkaMusicExample method createChartsStreams.

static KafkaStreams createChartsStreams(final String bootstrapServers, final String schemaRegistryUrl, final int applicationServerPort, final String stateDir) {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-music-charts");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Provide the details of our embedded http service that we'll use to connect to this streams
    // instance and discover locations of stores.
    streamsConfiguration.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:" + applicationServerPort);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    // Set to earliest so we don't miss any data that arrived in the topics before the process
    // started
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Set the commit interval to 500ms so that any changes are flushed frequently and the top five
    // charts are updated with low latency.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
    // Allow the user to fine-tune the `metadata.max.age.ms` via Java system properties from the CLI.
    // Lowering this parameter from its default of 5 minutes to a few seconds is helpful in
    // situations where the input topic was not pre-created before running the application because
    // the application will discover a newly created topic faster.  In production, you would
    // typically not change this parameter from its default.
    String metadataMaxAgeMs = System.getProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG);
    if (metadataMaxAgeMs != null) {
        try {
            int value = Integer.parseInt(metadataMaxAgeMs);
            streamsConfiguration.put(ConsumerConfig.METADATA_MAX_AGE_CONFIG, value);
            System.out.println("Set consumer configuration " + ConsumerConfig.METADATA_MAX_AGE_CONFIG + " to " + value);
        } catch (NumberFormatException ignored) {
        }
    }
    // create and configure the SpecificAvroSerdes required in this example
    final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
    playEventSerde.configure(serdeConfig, false);
    final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
    keySongSerde.configure(serdeConfig, true);
    final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
    valueSongSerde.configure(serdeConfig, false);
    final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
    songPlayCountSerde.configure(serdeConfig, false);
    final StreamsBuilder builder = new StreamsBuilder();
    // get a stream of play events
    final KStream<String, PlayEvent> playEvents = builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde));
    // get table and create a state store to hold all the songs in the store
    final KTable<Long, Song> songTable = builder.table(SONG_FEED, Materialized.<Long, Song, KeyValueStore<Bytes, byte[]>>as(ALL_SONGS).withKeySerde(Serdes.Long()).withValueSerde(valueSongSerde));
    // Accept play events that have a duration >= the minimum
    final KStream<Long, PlayEvent> playsBySongId = playEvents.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION).map((key, value) -> KeyValue.pair(value.getSongId(), value));
    // join the plays with song as we will use it later for charting
    final KStream<Long, Song> songPlays = playsBySongId.leftJoin(songTable, (value1, song) -> song, Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));
    // create a state store to track song play counts
    final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song, Serialized.with(keySongSerde, valueSongSerde)).count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE).withKeySerde(valueSongSerde).withValueSerde(Serdes.Long()));
    final TopFiveSerde topFiveSerde = new TopFiveSerde();
    // Compute the top five charts for each genre. The results of this computation will continuously update the state
    // store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
    // MusicPlaysRestService) for the latest charts per genre.
    songPlayCounts.groupBy((song, plays) -> KeyValue.pair(song.getGenre().toLowerCase(), new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
        aggregate.add(value);
        return aggregate;
    }, (aggKey, value, aggregate) -> {
        aggregate.remove(value);
        return aggregate;
    }, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
    // Compute the top five chart. The results of this computation will continuously update the state
    // store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
    // MusicPlaysRestService) for the latest charts per genre.
    songPlayCounts.groupBy((song, plays) -> KeyValue.pair(TOP_FIVE_KEY, new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
        aggregate.add(value);
        return aggregate;
    }, (aggKey, value, aggregate) -> {
        aggregate.remove(value);
        return aggregate;
    }, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) DataInputStream(java.io.DataInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Serialized(org.apache.kafka.streams.kstream.Serialized) HostInfo(org.apache.kafka.streams.state.HostInfo) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) TreeSet(java.util.TreeSet) Consumed(org.apache.kafka.streams.Consumed) ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) Serde(org.apache.kafka.common.serialization.Serde) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) PlayEvent(io.confluent.examples.streams.avro.PlayEvent) Deserializer(org.apache.kafka.common.serialization.Deserializer) SongPlayCount(io.confluent.examples.streams.avro.SongPlayCount) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Song(io.confluent.examples.streams.avro.Song) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Iterator(java.util.Iterator) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) IOException(java.io.IOException) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) Bytes(org.apache.kafka.common.utils.Bytes) SpecificAvroSerde(io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde) Serializer(org.apache.kafka.common.serialization.Serializer) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) SongPlayCount(io.confluent.examples.streams.avro.SongPlayCount) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Bytes(org.apache.kafka.common.utils.Bytes) SpecificAvroSerde(io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde) Song(io.confluent.examples.streams.avro.Song) PlayEvent(io.confluent.examples.streams.avro.PlayEvent)

Example 34 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class AnomalyDetectionLambdaExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "anomaly-detection-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "anomaly-detection-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // Set the commit interval to 500ms so that any changes are flushed frequently. The low latency
    // would be important for anomaly detection.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // Read the source stream.  In this example, we ignore whatever is stored in the record key and
    // assume the record value contains the username (and each record would represent a single
    // click by the corresponding user).
    final KStream<String, String> views = builder.stream("UserClicks");
    final KTable<Windowed<String>, Long> anomalousUsers = views.map((ignoredKey, username) -> new KeyValue<>(username, username)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(1))).count().filter((windowedUserId, count) -> count >= 3);
    // Note: The following operations would NOT be needed for the actual anomaly detection,
    // which would normally stop at the filter() above.  We use the operations below only to
    // "massage" the output data so it is easier to inspect on the console via
    // kafka-console-consumer.
    final KStream<String, Long> anomalousUsersForConsole = anomalousUsers.toStream().filter((windowedUserId, count) -> count != null).map((windowedUserId, count) -> new KeyValue<>(windowedUserId.toString(), count));
    // write to the result topic
    anomalousUsersForConsole.to("AnomalousUsers", Produced.with(stringSerde, longSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) KeyValue(org.apache.kafka.streams.KeyValue) KStream(org.apache.kafka.streams.kstream.KStream) TimeUnit(java.util.concurrent.TimeUnit) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 35 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka-streams-examples by confluentinc.

the class WikipediaFeedAvroLambdaExample method buildWikipediaFeed.

static KafkaStreams buildWikipediaFeed(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-avro-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-avro-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // read the source stream
    final KStream<String, WikiFeed> feeds = builder.stream(WikipediaFeedAvroExample.WIKIPEDIA_FEED);
    // aggregate the new feed counts of by user
    final KTable<String, Long> aggregated = feeds.filter((dummy, value) -> value.getIsNew()).map((key, value) -> new KeyValue<>(value.getUser(), value)).groupByKey().count();
    // write to the result topic, need to override serdes
    aggregated.toStream().to(WikipediaFeedAvroExample.WIKIPEDIA_STATS, Produced.with(stringSerde, longSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) WikiFeed(io.confluent.examples.streams.avro.WikiFeed) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KStream(org.apache.kafka.streams.kstream.KStream) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) SpecificAvroSerde(io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde) Serde(org.apache.kafka.common.serialization.Serde) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) WikiFeed(io.confluent.examples.streams.avro.WikiFeed) Properties(java.util.Properties)

Aggregations

KTable (org.apache.kafka.streams.kstream.KTable)56 Serdes (org.apache.kafka.common.serialization.Serdes)51 Properties (java.util.Properties)50 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)49 KeyValue (org.apache.kafka.streams.KeyValue)41 StreamsConfig (org.apache.kafka.streams.StreamsConfig)41 Test (org.junit.Test)40 KStream (org.apache.kafka.streams.kstream.KStream)39 Materialized (org.apache.kafka.streams.kstream.Materialized)35 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)32 Consumed (org.apache.kafka.streams.kstream.Consumed)32 Produced (org.apache.kafka.streams.kstream.Produced)28 Bytes (org.apache.kafka.common.utils.Bytes)27 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)27 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)27 Grouped (org.apache.kafka.streams.kstream.Grouped)25 List (java.util.List)24 Serde (org.apache.kafka.common.serialization.Serde)24 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)24 KafkaStreams (org.apache.kafka.streams.KafkaStreams)24