use of io.confluent.kafka.streams.serdes.avro.GenericAvroSerde in project kafka-streams-examples by confluentinc.
the class TopArticlesLambdaExample method buildTopArticlesStream.
static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
// Serdes used in this example
final Serde<String> stringSerde = Serdes.String();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
keyAvroSerde.configure(serdeConfig, true);
final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
valueAvroSerde.configure(serdeConfig, false);
final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
final Schema schema = new Schema.Parser().parse(statsSchema);
final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
final GenericRecord clone = new GenericData.Record(article.getSchema());
clone.put("user", "user");
clone.put("page", article.get("page"));
clone.put("industry", article.get("industry"));
return new KeyValue<>(clone, clone);
});
final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
(windowedArticle, count) -> {
// project on the industry field for key
Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
// add the page into the value
GenericRecord viewStats = new GenericData.Record(schema);
viewStats.put("page", windowedArticle.key().get("page"));
viewStats.put("user", "user");
viewStats.put("industry", windowedArticle.key().get("industry"));
viewStats.put("count", count);
return new KeyValue<>(windowedIndustry, viewStats);
}, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
() -> new PriorityQueue<>(comparator), // the "add" aggregator
(windowedIndustry, record, queue) -> {
queue.add(record);
return queue;
}, // the "remove" aggregator
(windowedIndustry, record, queue) -> {
queue.remove(record);
return queue;
}, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
final int topN = 100;
final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < topN; i++) {
final GenericRecord record = queue.poll();
if (record == null) {
break;
}
sb.append(record.get("page").toString());
sb.append("\n");
}
return sb.toString();
});
topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of io.confluent.kafka.streams.serdes.avro.GenericAvroSerde in project kafka-streams-examples by confluentinc.
the class GenericAvroIntegrationTest method shouldRoundTripGenericAvroDataThroughKafka.
@Test
public void shouldRoundTripGenericAvroDataThroughKafka() throws Exception {
Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/avro/io/confluent/examples/streams/wikifeed.avsc"));
GenericRecord record = new GenericData.Record(schema);
record.put("user", "alice");
record.put("is_new", true);
record.put("content", "lorem ipsum");
List<GenericRecord> inputValues = Collections.singletonList(record);
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "generic-avro-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Write the input data as-is to the output topic.
//
// Normally, because a) we have already configured the correct default serdes for keys and
// values and b) the types for keys and values are the same for both the input topic and the
// output topic, we would only need to define:
//
// builder.stream(inputTopic).to(outputTopic);
//
// However, in the code below we intentionally override the default serdes in `to()` to
// demonstrate how you can construct and configure a generic Avro serde manually.
final Serde<String> stringSerde = Serdes.String();
final Serde<GenericRecord> genericAvroSerde = new GenericAvroSerde();
// Note how we must manually call `configure()` on this serde to configure the schema registry
// url. This is different from the case of setting default serdes (see `streamsConfiguration`
// above), which will be auto-configured based on the `StreamsConfiguration` instance.
final boolean isKeySerde = false;
genericAvroSerde.configure(Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl()), isKeySerde);
KStream<String, GenericRecord> stream = builder.stream(inputTopic);
stream.to(outputTopic, Produced.with(stringSerde, genericAvroSerde));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
producerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "generic-avro-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class);
consumerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
List<GenericRecord> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, inputValues.size());
streams.close();
assertEquals(inputValues, actualValues);
}
Aggregations