Search in sources :

Example 1 with Serdes

use of org.apache.kafka.common.serialization.Serdes in project kafka-streams-examples by confluentinc.

the class TopArticlesLambdaExample method buildTopArticlesStream.

static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // Serdes used in this example
    final Serde<String> stringSerde = Serdes.String();
    final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
    keyAvroSerde.configure(serdeConfig, true);
    final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
    valueAvroSerde.configure(serdeConfig, false);
    final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
    final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
    final Schema schema = new Schema.Parser().parse(statsSchema);
    final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
        final GenericRecord clone = new GenericData.Record(article.getSchema());
        clone.put("user", "user");
        clone.put("page", article.get("page"));
        clone.put("industry", article.get("industry"));
        return new KeyValue<>(clone, clone);
    });
    final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
    final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
    final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
    (windowedArticle, count) -> {
        // project on the industry field for key
        Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
        // add the page into the value
        GenericRecord viewStats = new GenericData.Record(schema);
        viewStats.put("page", windowedArticle.key().get("page"));
        viewStats.put("user", "user");
        viewStats.put("industry", windowedArticle.key().get("industry"));
        viewStats.put("count", count);
        return new KeyValue<>(windowedIndustry, viewStats);
    }, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
    () -> new PriorityQueue<>(comparator), // the "add" aggregator
    (windowedIndustry, record, queue) -> {
        queue.add(record);
        return queue;
    }, // the "remove" aggregator
    (windowedIndustry, record, queue) -> {
        queue.remove(record);
        return queue;
    }, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
    final int topN = 100;
    final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
        final StringBuilder sb = new StringBuilder();
        for (int i = 0; i < topN; i++) {
            final GenericRecord record = queue.poll();
            if (record == null) {
                break;
            }
            sb.append(record.get("page").toString());
            sb.append("\n");
        }
        return sb.toString();
    });
    topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) PriorityQueue(java.util.PriorityQueue) KStream(org.apache.kafka.streams.kstream.KStream) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) Utf8(org.apache.avro.util.Utf8) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) TimeUnit(java.util.concurrent.TimeUnit) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) PriorityQueue(java.util.PriorityQueue) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)

Example 2 with Serdes

use of org.apache.kafka.common.serialization.Serdes in project kafka-streams-examples by confluentinc.

the class WordCountLambdaExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // For illustrative purposes we disable record caches
    streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    // Set up serializers and deserializers, which we will use for overriding the default serdes
    // specified above.
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    // In the subsequent lines we define the processing topology of the Streams application.
    final StreamsBuilder builder = new StreamsBuilder();
    // Construct a `KStream` from the input topic "streams-plaintext-input", where message values
    // represent lines of text (for the sake of this example, we ignore whatever may be stored
    // in the message keys).
    // 
    // Note: We could also just call `builder.stream("streams-plaintext-input")` if we wanted to leverage
    // the default serdes specified in the Streams configuration above, because these defaults
    // match what's in the actual topic.  However we explicitly set the deserializers in the
    // call to `stream()` below in order to show how that's done, too.
    final KStream<String, String> textLines = builder.stream("streams-plaintext-input");
    final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
    final KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
    // Write the `KTable<String, Long>` to the output topic.
    wordCounts.toStream().to("streams-wordcount-output", Produced.with(stringSerde, longSerde));
    // Now that we have finished the definition of the processing topology we can actually run
    // it via `start()`.  The Streams application as a whole can be launched just like any
    // normal Java application that has a `main()` method.
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) Serde(org.apache.kafka.common.serialization.Serde) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KStream(org.apache.kafka.streams.kstream.KStream) Pattern(java.util.regex.Pattern) Pattern(java.util.regex.Pattern) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties)

Example 3 with Serdes

use of org.apache.kafka.common.serialization.Serdes in project kafka-streams-examples by confluentinc.

the class WikipediaFeedAvroLambdaExample method buildWikipediaFeed.

static KafkaStreams buildWikipediaFeed(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-avro-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-avro-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // read the source stream
    final KStream<String, WikiFeed> feeds = builder.stream(WikipediaFeedAvroExample.WIKIPEDIA_FEED);
    // aggregate the new feed counts of by user
    final KTable<String, Long> aggregated = feeds.filter((dummy, value) -> value.getIsNew()).map((key, value) -> new KeyValue<>(value.getUser(), value)).groupByKey().count();
    // write to the result topic, need to override serdes
    aggregated.toStream().to(WikipediaFeedAvroExample.WIKIPEDIA_STATS, Produced.with(stringSerde, longSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) WikiFeed(io.confluent.examples.streams.avro.WikiFeed) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KStream(org.apache.kafka.streams.kstream.KStream) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) SpecificAvroSerde(io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde) Serde(org.apache.kafka.common.serialization.Serde) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) WikiFeed(io.confluent.examples.streams.avro.WikiFeed) Properties(java.util.Properties)

Aggregations

Properties (java.util.Properties)3 Serde (org.apache.kafka.common.serialization.Serde)3 Serdes (org.apache.kafka.common.serialization.Serdes)3 KafkaStreams (org.apache.kafka.streams.KafkaStreams)3 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)3 StreamsConfig (org.apache.kafka.streams.StreamsConfig)3 KStream (org.apache.kafka.streams.kstream.KStream)3 KTable (org.apache.kafka.streams.kstream.KTable)3 Produced (org.apache.kafka.streams.kstream.Produced)3 AbstractKafkaAvroSerDeConfig (io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig)2 KeyValue (org.apache.kafka.streams.KeyValue)2 WikiFeed (io.confluent.examples.streams.avro.WikiFeed)1 PriorityQueueSerde (io.confluent.examples.streams.utils.PriorityQueueSerde)1 WindowedSerde (io.confluent.examples.streams.utils.WindowedSerde)1 GenericAvroSerde (io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)1 SpecificAvroSerde (io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1