Search in sources :

Example 1 with GenericAvroSerde

use of io.confluent.kafka.streams.serdes.avro.GenericAvroSerde in project kafka-streams-examples by confluentinc.

the class TopArticlesLambdaExample method buildTopArticlesStream.

static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // Serdes used in this example
    final Serde<String> stringSerde = Serdes.String();
    final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
    keyAvroSerde.configure(serdeConfig, true);
    final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
    valueAvroSerde.configure(serdeConfig, false);
    final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
    final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
    final Schema schema = new Schema.Parser().parse(statsSchema);
    final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
        final GenericRecord clone = new GenericData.Record(article.getSchema());
        clone.put("user", "user");
        clone.put("page", article.get("page"));
        clone.put("industry", article.get("industry"));
        return new KeyValue<>(clone, clone);
    });
    final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
    final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
    final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
    (windowedArticle, count) -> {
        // project on the industry field for key
        Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
        // add the page into the value
        GenericRecord viewStats = new GenericData.Record(schema);
        viewStats.put("page", windowedArticle.key().get("page"));
        viewStats.put("user", "user");
        viewStats.put("industry", windowedArticle.key().get("industry"));
        viewStats.put("count", count);
        return new KeyValue<>(windowedIndustry, viewStats);
    }, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
    () -> new PriorityQueue<>(comparator), // the "add" aggregator
    (windowedIndustry, record, queue) -> {
        queue.add(record);
        return queue;
    }, // the "remove" aggregator
    (windowedIndustry, record, queue) -> {
        queue.remove(record);
        return queue;
    }, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
    final int topN = 100;
    final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
        final StringBuilder sb = new StringBuilder();
        for (int i = 0; i < topN; i++) {
            final GenericRecord record = queue.poll();
            if (record == null) {
                break;
            }
            sb.append(record.get("page").toString());
            sb.append("\n");
        }
        return sb.toString();
    });
    topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) PriorityQueue(java.util.PriorityQueue) KStream(org.apache.kafka.streams.kstream.KStream) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) Utf8(org.apache.avro.util.Utf8) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) TimeUnit(java.util.concurrent.TimeUnit) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) PriorityQueue(java.util.PriorityQueue) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)

Example 2 with GenericAvroSerde

use of io.confluent.kafka.streams.serdes.avro.GenericAvroSerde in project kafka-streams-examples by confluentinc.

the class GenericAvroIntegrationTest method shouldRoundTripGenericAvroDataThroughKafka.

@Test
public void shouldRoundTripGenericAvroDataThroughKafka() throws Exception {
    Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/avro/io/confluent/examples/streams/wikifeed.avsc"));
    GenericRecord record = new GenericData.Record(schema);
    record.put("user", "alice");
    record.put("is_new", true);
    record.put("content", "lorem ipsum");
    List<GenericRecord> inputValues = Collections.singletonList(record);
    // 
    // Step 1: Configure and start the processor topology.
    // 
    StreamsBuilder builder = new StreamsBuilder();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "generic-avro-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Write the input data as-is to the output topic.
    // 
    // Normally, because a) we have already configured the correct default serdes for keys and
    // values and b) the types for keys and values are the same for both the input topic and the
    // output topic, we would only need to define:
    // 
    // builder.stream(inputTopic).to(outputTopic);
    // 
    // However, in the code below we intentionally override the default serdes in `to()` to
    // demonstrate how you can construct and configure a generic Avro serde manually.
    final Serde<String> stringSerde = Serdes.String();
    final Serde<GenericRecord> genericAvroSerde = new GenericAvroSerde();
    // Note how we must manually call `configure()` on this serde to configure the schema registry
    // url.  This is different from the case of setting default serdes (see `streamsConfiguration`
    // above), which will be auto-configured based on the `StreamsConfiguration` instance.
    final boolean isKeySerde = false;
    genericAvroSerde.configure(Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl()), isKeySerde);
    KStream<String, GenericRecord> stream = builder.stream(inputTopic);
    stream.to(outputTopic, Produced.with(stringSerde, genericAvroSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
    producerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "generic-avro-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class);
    consumerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
    List<GenericRecord> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, inputValues.size());
    streams.close();
    assertEquals(inputValues, actualValues);
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) Schema(org.apache.avro.Schema) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericAvroSerde (io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)2 Properties (java.util.Properties)2 Schema (org.apache.avro.Schema)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 KafkaStreams (org.apache.kafka.streams.KafkaStreams)2 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)2 PriorityQueueSerde (io.confluent.examples.streams.utils.PriorityQueueSerde)1 WindowedSerde (io.confluent.examples.streams.utils.WindowedSerde)1 AbstractKafkaAvroSerDeConfig (io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 Map (java.util.Map)1 PriorityQueue (java.util.PriorityQueue)1 TimeUnit (java.util.concurrent.TimeUnit)1 GenericData (org.apache.avro.generic.GenericData)1 Utf8 (org.apache.avro.util.Utf8)1 Serde (org.apache.kafka.common.serialization.Serde)1 Serdes (org.apache.kafka.common.serialization.Serdes)1