use of io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde in project kafka-streams-examples by confluentinc.
the class GlobalKTablesExample method createStreams.
public static KafkaStreams createStreams(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "global-tables-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "global-tables-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<Order> orderSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
orderSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Customer> customerSerde = new SpecificAvroSerde<>();
customerSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Product> productSerde = new SpecificAvroSerde<>();
productSerde.configure(serdeConfig, false);
final SpecificAvroSerde<EnrichedOrder> enrichedOrdersSerde = new SpecificAvroSerde<>();
enrichedOrdersSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// Get the stream of orders
final KStream<Long, Order> ordersStream = builder.stream(ORDER_TOPIC, Consumed.with(Serdes.Long(), orderSerde));
// Create a global table for customers. The data from this global table
// will be fully replicated on each instance of this application.
final GlobalKTable<Long, Customer> customers = builder.globalTable(CUSTOMER_TOPIC, Materialized.<Long, Customer, KeyValueStore<Bytes, byte[]>>as(CUSTOMER_STORE).withKeySerde(Serdes.Long()).withValueSerde(customerSerde));
// Create a global table for products. The data from this global table
// will be fully replicated on each instance of this application.
final GlobalKTable<Long, Product> products = builder.globalTable(PRODUCT_TOPIC, Materialized.<Long, Product, KeyValueStore<Bytes, byte[]>>as(PRODUCT_STORE).withKeySerde(Serdes.Long()).withValueSerde(productSerde));
// Join the orders stream to the customer global table. As this is global table
// we can use a non-key based join with out needing to repartition the input stream
final KStream<Long, CustomerOrder> customerOrdersStream = ordersStream.join(customers, (orderId, order) -> order.getCustomerId(), (order, customer) -> new CustomerOrder(customer, order));
// Join the enriched customer order stream with the product global table. As this is global table
// we can use a non-key based join without needing to repartition the input stream
final KStream<Long, EnrichedOrder> enrichedOrdersStream = customerOrdersStream.join(products, (orderId, customerOrder) -> customerOrder.productId(), (customerOrder, product) -> new EnrichedOrder(product, customerOrder.customer, customerOrder.order));
// write the enriched order to the enriched-order topic
enrichedOrdersStream.to(ENRICHED_ORDER_TOPIC, Produced.with(Serdes.Long(), enrichedOrdersSerde));
return new KafkaStreams(builder.build(), new StreamsConfig(streamsConfiguration));
}
use of io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde in project kafka-streams-examples by confluentinc.
the class SpecificAvroIntegrationTest method shouldRoundTripSpecificAvroDataThroughKafka.
@Test
public void shouldRoundTripSpecificAvroDataThroughKafka() throws Exception {
List<WikiFeed> inputValues = Collections.singletonList(WikiFeed.newBuilder().setUser("alice").setIsNew(true).setContent("lorem ipsum").build());
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "specific-avro-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Write the input data as-is to the output topic.
//
// Normally, because a) we have already configured the correct default serdes for keys and
// values and b) the types for keys and values are the same for both the input topic and the
// output topic, we would only need to define:
//
// builder.stream(inputTopic).to(outputTopic);
//
// However, in the code below we intentionally override the default serdes in `to()` to
// demonstrate how you can construct and configure a specific Avro serde manually.
final Serde<String> stringSerde = Serdes.String();
final Serde<WikiFeed> specificAvroSerde = new SpecificAvroSerde<>();
// Note how we must manually call `configure()` on this serde to configure the schema registry
// url. This is different from the case of setting default serdes (see `streamsConfiguration`
// above), which will be auto-configured based on the `StreamsConfiguration` instance.
final boolean isKeySerde = false;
specificAvroSerde.configure(Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl()), isKeySerde);
KStream<String, WikiFeed> stream = builder.stream(inputTopic);
stream.to(outputTopic, Produced.with(stringSerde, specificAvroSerde));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
producerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "specific-avro-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class);
consumerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
consumerConfig.put(KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG, true);
List<WikiFeed> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, inputValues.size());
streams.close();
assertEquals(inputValues, actualValues);
}
use of io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde in project kafka-streams-examples by confluentinc.
the class KafkaMusicExample method createChartsStreams.
static KafkaStreams createChartsStreams(final String bootstrapServers, final String schemaRegistryUrl, final int applicationServerPort, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-music-charts");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Provide the details of our embedded http service that we'll use to connect to this streams
// instance and discover locations of stores.
streamsConfiguration.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:" + applicationServerPort);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Set the commit interval to 500ms so that any changes are flushed frequently and the top five
// charts are updated with low latency.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
// Allow the user to fine-tune the `metadata.max.age.ms` via Java system properties from the CLI.
// Lowering this parameter from its default of 5 minutes to a few seconds is helpful in
// situations where the input topic was not pre-created before running the application because
// the application will discover a newly created topic faster. In production, you would
// typically not change this parameter from its default.
String metadataMaxAgeMs = System.getProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG);
if (metadataMaxAgeMs != null) {
try {
int value = Integer.parseInt(metadataMaxAgeMs);
streamsConfiguration.put(ConsumerConfig.METADATA_MAX_AGE_CONFIG, value);
System.out.println("Set consumer configuration " + ConsumerConfig.METADATA_MAX_AGE_CONFIG + " to " + value);
} catch (NumberFormatException ignored) {
}
}
// create and configure the SpecificAvroSerdes required in this example
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
playEventSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
keySongSerde.configure(serdeConfig, true);
final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
valueSongSerde.configure(serdeConfig, false);
final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
songPlayCountSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// get a stream of play events
final KStream<String, PlayEvent> playEvents = builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde));
// get table and create a state store to hold all the songs in the store
final KTable<Long, Song> songTable = builder.table(SONG_FEED, Materialized.<Long, Song, KeyValueStore<Bytes, byte[]>>as(ALL_SONGS).withKeySerde(Serdes.Long()).withValueSerde(valueSongSerde));
// Accept play events that have a duration >= the minimum
final KStream<Long, PlayEvent> playsBySongId = playEvents.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION).map((key, value) -> KeyValue.pair(value.getSongId(), value));
// join the plays with song as we will use it later for charting
final KStream<Long, Song> songPlays = playsBySongId.leftJoin(songTable, (value1, song) -> song, Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));
// create a state store to track song play counts
final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song, Serialized.with(keySongSerde, valueSongSerde)).count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE).withKeySerde(valueSongSerde).withValueSerde(Serdes.Long()));
final TopFiveSerde topFiveSerde = new TopFiveSerde();
// Compute the top five charts for each genre. The results of this computation will continuously update the state
// store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(song.getGenre().toLowerCase(), new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
// Compute the top five chart. The results of this computation will continuously update the state
// store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(TOP_FIVE_KEY, new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde in project kafka-streams-examples by confluentinc.
the class SessionWindowsExample method createStreams.
static KafkaStreams createStreams(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties config = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
config.put(StreamsConfig.APPLICATION_ID_CONFIG, "session-windows-example");
config.put(StreamsConfig.CLIENT_ID_CONFIG, "session-windows-example-client");
// Where to find Kafka broker(s).
config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
config.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// disable caching to see session merging
config.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
playEventSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde)).groupByKey(Serialized.with(Serdes.String(), playEventSerde)).windowedBy(SessionWindows.with(INACTIVITY_GAP)).count(Materialized.<String, Long, SessionStore<Bytes, byte[]>>as(PLAY_EVENTS_PER_SESSION).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().map((key, value) -> new KeyValue<>(key.key() + "@" + key.window().start() + "->" + key.window().end(), value)).to(PLAY_EVENTS_PER_SESSION, Produced.with(Serdes.String(), Serdes.Long()));
return new KafkaStreams(builder.build(), new StreamsConfig(config));
}
Aggregations