use of org.apache.kafka.streams.kstream.Materialized in project kafka-streams-examples by confluentinc.
the class WordCountInteractiveQueriesExample method createStreams.
static KafkaStreams createStreams(final Properties streamsConfiguration) {
final Serde<String> stringSerde = Serdes.String();
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> textLines = builder.stream(TEXT_LINES_TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+"))).groupBy((key, word) -> word, Serialized.with(stringSerde, stringSerde));
// Create a State Store for with the all time word count
groupedByWord.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("word-count").withValueSerde(Serdes.Long()));
// Create a Windowed State Store that contains the word count for every
// 1 minute
groupedByWord.windowedBy(TimeWindows.of(60000)).count(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("windowed-word-count").withValueSerde(Serdes.Long()));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.kstream.Materialized in project kafka-streams-examples by confluentinc.
the class KafkaMusicExample method createChartsStreams.
static KafkaStreams createChartsStreams(final String bootstrapServers, final String schemaRegistryUrl, final int applicationServerPort, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-music-charts");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Provide the details of our embedded http service that we'll use to connect to this streams
// instance and discover locations of stores.
streamsConfiguration.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:" + applicationServerPort);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Set the commit interval to 500ms so that any changes are flushed frequently and the top five
// charts are updated with low latency.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
// Allow the user to fine-tune the `metadata.max.age.ms` via Java system properties from the CLI.
// Lowering this parameter from its default of 5 minutes to a few seconds is helpful in
// situations where the input topic was not pre-created before running the application because
// the application will discover a newly created topic faster. In production, you would
// typically not change this parameter from its default.
String metadataMaxAgeMs = System.getProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG);
if (metadataMaxAgeMs != null) {
try {
int value = Integer.parseInt(metadataMaxAgeMs);
streamsConfiguration.put(ConsumerConfig.METADATA_MAX_AGE_CONFIG, value);
System.out.println("Set consumer configuration " + ConsumerConfig.METADATA_MAX_AGE_CONFIG + " to " + value);
} catch (NumberFormatException ignored) {
}
}
// create and configure the SpecificAvroSerdes required in this example
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
playEventSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
keySongSerde.configure(serdeConfig, true);
final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
valueSongSerde.configure(serdeConfig, false);
final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
songPlayCountSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// get a stream of play events
final KStream<String, PlayEvent> playEvents = builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde));
// get table and create a state store to hold all the songs in the store
final KTable<Long, Song> songTable = builder.table(SONG_FEED, Materialized.<Long, Song, KeyValueStore<Bytes, byte[]>>as(ALL_SONGS).withKeySerde(Serdes.Long()).withValueSerde(valueSongSerde));
// Accept play events that have a duration >= the minimum
final KStream<Long, PlayEvent> playsBySongId = playEvents.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION).map((key, value) -> KeyValue.pair(value.getSongId(), value));
// join the plays with song as we will use it later for charting
final KStream<Long, Song> songPlays = playsBySongId.leftJoin(songTable, (value1, song) -> song, Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));
// create a state store to track song play counts
final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song, Serialized.with(keySongSerde, valueSongSerde)).count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE).withKeySerde(valueSongSerde).withValueSerde(Serdes.Long()));
final TopFiveSerde topFiveSerde = new TopFiveSerde();
// Compute the top five charts for each genre. The results of this computation will continuously update the state
// store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(song.getGenre().toLowerCase(), new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
// Compute the top five chart. The results of this computation will continuously update the state
// store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(TOP_FIVE_KEY, new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.kstream.Materialized in project kafka-streams-examples by confluentinc.
the class SessionWindowsExample method createStreams.
static KafkaStreams createStreams(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties config = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
config.put(StreamsConfig.APPLICATION_ID_CONFIG, "session-windows-example");
config.put(StreamsConfig.CLIENT_ID_CONFIG, "session-windows-example-client");
// Where to find Kafka broker(s).
config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
config.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// disable caching to see session merging
config.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
playEventSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde)).groupByKey(Serialized.with(Serdes.String(), playEventSerde)).windowedBy(SessionWindows.with(INACTIVITY_GAP)).count(Materialized.<String, Long, SessionStore<Bytes, byte[]>>as(PLAY_EVENTS_PER_SESSION).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().map((key, value) -> new KeyValue<>(key.key() + "@" + key.window().start() + "->" + key.window().end(), value)).to(PLAY_EVENTS_PER_SESSION, Produced.with(Serdes.String(), Serdes.Long()));
return new KafkaStreams(builder.build(), new StreamsConfig(config));
}
Aggregations