use of io.confluent.examples.streams.avro.PlayEvent in project kafka-streams-examples by confluentinc.
the class KafkaMusicExampleDriver method main.
public static void main(String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final String schemaRegistryUrl = args.length > 1 ? args[1] : "http://localhost:8081";
System.out.println("Connecting to Kafka cluster via bootstrap servers " + bootstrapServers);
System.out.println("Connecting to Confluent schema registry at " + schemaRegistryUrl);
// Read comma-delimited file of songs into Array
final List<Song> songs = new ArrayList<>();
final String SONGFILENAME = "song_source.csv";
final InputStream inputStream = KafkaMusicExample.class.getClassLoader().getResourceAsStream(SONGFILENAME);
final InputStreamReader streamReader = new InputStreamReader(inputStream, UTF_8);
try (final BufferedReader br = new BufferedReader(streamReader)) {
String line = null;
while ((line = br.readLine()) != null) {
final String[] values = line.split(",");
final Song newSong = new Song(Long.parseLong(values[0]), values[1], values[2], values[3], values[4]);
songs.add(newSong);
}
}
final Properties props = new Properties();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final SpecificAvroSerializer<PlayEvent> playEventSerializer = new SpecificAvroSerializer<>();
playEventSerializer.configure(serdeConfig, false);
final SpecificAvroSerializer<Song> songSerializer = new SpecificAvroSerializer<>();
songSerializer.configure(serdeConfig, false);
final KafkaProducer<String, PlayEvent> playEventProducer = new KafkaProducer<>(props, Serdes.String().serializer(), playEventSerializer);
final KafkaProducer<Long, Song> songProducer = new KafkaProducer<>(props, new LongSerializer(), songSerializer);
songs.forEach(song -> {
System.out.println("Writing song information for '" + song.getName() + "' to input topic " + KafkaMusicExample.SONG_FEED);
songProducer.send(new ProducerRecord<>(KafkaMusicExample.SONG_FEED, song.getId(), song));
});
songProducer.close();
final long duration = 60 * 1000L;
final Random random = new Random();
// send a play event every 100 milliseconds
while (true) {
final Song song = songs.get(random.nextInt(songs.size()));
System.out.println("Writing play event for song " + song.getName() + " to input topic " + KafkaMusicExample.PLAY_EVENTS);
playEventProducer.send(new ProducerRecord<>(KafkaMusicExample.PLAY_EVENTS, "uk", new PlayEvent(song.getId(), duration)));
Thread.sleep(100L);
}
}
use of io.confluent.examples.streams.avro.PlayEvent in project kafka-streams-examples by confluentinc.
the class KafkaMusicExampleTest method shouldCreateChartsAndAccessThemViaInteractiveQueries.
@Test
public void shouldCreateChartsAndAccessThemViaInteractiveQueries() throws Exception {
final Properties props = new Properties();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
final SpecificAvroSerializer<PlayEvent> playEventSerializer = new SpecificAvroSerializer<>();
playEventSerializer.configure(serdeConfig, false);
final SpecificAvroSerializer<Song> songSerializer = new SpecificAvroSerializer<>();
songSerializer.configure(serdeConfig, false);
final KafkaProducer<String, PlayEvent> playEventProducer = new KafkaProducer<>(props, Serdes.String().serializer(), playEventSerializer);
final KafkaProducer<Long, Song> songProducer = new KafkaProducer<>(props, new LongSerializer(), songSerializer);
final List<Song> songs = Arrays.asList(new Song(1L, "Fresh Fruit For Rotting Vegetables", "Dead Kennedys", "Chemical Warfare", "Punk"), new Song(2L, "We Are the League", "Anti-Nowhere League", "Animal", "Punk"), new Song(3L, "Live In A Dive", "Subhumans", "All Gone Dead", "Punk"), new Song(4L, "PSI", "Wheres The Pope?", "Fear Of God", "Punk"), new Song(5L, "Totally Exploited", "The Exploited", "Punks Not Dead", "Punk"), new Song(6L, "The Audacity Of Hype", "Jello Biafra And The Guantanamo School Of " + "Medicine", "Three Strikes", "Punk"), new Song(7L, "Licensed to Ill", "The Beastie Boys", "Fight For Your Right", "Hip Hop"), new Song(8L, "De La Soul Is Dead", "De La Soul", "Oodles Of O's", "Hip Hop"), new Song(9L, "Straight Outta Compton", "N.W.A", "Gangsta Gangsta", "Hip Hop"), new Song(10L, "Fear Of A Black Planet", "Public Enemy", "911 Is A Joke", "Hip Hop"), new Song(11L, "Curtain Call - The Hits", "Eminem", "Fack", "Hip Hop"), new Song(12L, "The Calling", "Hilltop Hoods", "The Calling", "Hip Hop"));
songs.forEach(song -> songProducer.send(new ProducerRecord<Long, Song>(KafkaMusicExample.SONG_FEED, song.getId(), song)));
songProducer.flush();
songProducer.close();
// create the play events we can use for charting
sendPlayEvents(6, songs.get(0), playEventProducer);
sendPlayEvents(5, songs.get(1), playEventProducer);
sendPlayEvents(4, songs.get(2), playEventProducer);
sendPlayEvents(3, songs.get(3), playEventProducer);
sendPlayEvents(2, songs.get(4), playEventProducer);
sendPlayEvents(1, songs.get(5), playEventProducer);
sendPlayEvents(6, songs.get(6), playEventProducer);
sendPlayEvents(5, songs.get(7), playEventProducer);
sendPlayEvents(4, songs.get(8), playEventProducer);
sendPlayEvents(3, songs.get(9), playEventProducer);
sendPlayEvents(2, songs.get(10), playEventProducer);
sendPlayEvents(1, songs.get(11), playEventProducer);
playEventProducer.close();
streams.start();
// wait until the StreamsMetadata is available as this indicates that
// KafkaStreams initialization has occurred
TestUtils.waitForCondition(() -> !StreamsMetadata.NOT_AVAILABLE.equals(streams.allMetadataForStore(KafkaMusicExample.TOP_FIVE_SONGS_STORE)), MAX_WAIT_MS, "StreamsMetadata should be available");
final String baseUrl = "http://localhost:" + appServerPort + "/kafka-music";
final Client client = ClientBuilder.newClient();
// Wait until the all-songs state store has some data in it
TestUtils.waitForCondition(() -> {
final ReadOnlyKeyValueStore<Long, Song> songsStore;
try {
songsStore = streams.store(KafkaMusicExample.ALL_SONGS, QueryableStoreTypes.<Long, Song>keyValueStore());
return songsStore.all().hasNext();
} catch (Exception e) {
return false;
}
}, MAX_WAIT_MS, KafkaMusicExample.ALL_SONGS + " should be non-empty");
final IntFunction<SongPlayCountBean> intFunction = index -> {
final Song song = songs.get(index);
return songCountPlayBean(song, 6L - (index % 6));
};
// Verify that the charts are as expected
verifyChart(baseUrl + "/charts/genre/punk", client, IntStream.range(0, 5).mapToObj(intFunction).collect(Collectors.toList()));
verifyChart(baseUrl + "/charts/genre/hip hop", client, IntStream.range(6, 11).mapToObj(intFunction).collect(Collectors.toList()));
verifyChart(baseUrl + "/charts/top-five", client, Arrays.asList(songCountPlayBean(songs.get(0), 6L), songCountPlayBean(songs.get(6), 6L), songCountPlayBean(songs.get(1), 5L), songCountPlayBean(songs.get(7), 5L), songCountPlayBean(songs.get(2), 4L)));
}
use of io.confluent.examples.streams.avro.PlayEvent in project kafka-streams-examples by confluentinc.
the class SessionWindowsExampleTest method shouldCountPlayEventsBySession.
@Test
public void shouldCountPlayEventsBySession() throws Exception {
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
final SpecificAvroSerializer<PlayEvent> playEventSerializer = new SpecificAvroSerializer<>();
playEventSerializer.configure(serdeConfig, false);
final Properties producerProperties = new Properties();
producerProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
final KafkaProducer<String, PlayEvent> playEventProducer = new KafkaProducer<>(producerProperties, Serdes.String().serializer(), playEventSerializer);
final Properties consumerProps = new Properties();
consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "session-windows-consumer");
consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, Serdes.String().deserializer().getClass());
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, Serdes.Long().deserializer().getClass());
final long start = System.currentTimeMillis();
final String userId = "erica";
playEventProducer.send(new ProducerRecord<>(SessionWindowsExample.PLAY_EVENTS, null, start, userId, new PlayEvent(1L, 10L)));
final List<KeyValue<String, Long>> firstSession = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerProps, SessionWindowsExample.PLAY_EVENTS_PER_SESSION, 1);
// should have a session for erica with start and end time the same
assertThat(firstSession.get(0), equalTo(KeyValue.pair(userId + "@" + start + "->" + start, 1L)));
// also look in the store to find the same session
final ReadOnlySessionStore<String, Long> playEventsPerSession = streams.store(SessionWindowsExample.PLAY_EVENTS_PER_SESSION, QueryableStoreTypes.<String, Long>sessionStore());
final KeyValue<Windowed<String>, Long> next = fetchSessionsFromLocalStore(userId, playEventsPerSession).get(0);
assertThat(next.key, equalTo(new Windowed<>(userId, new SessionWindow(start, start))));
assertThat(next.value, equalTo(1L));
// send another event that is after the inactivity gap, so we have 2 independent sessions
final long secondSessionStart = start + SessionWindowsExample.INACTIVITY_GAP + 1;
playEventProducer.send(new ProducerRecord<>(SessionWindowsExample.PLAY_EVENTS, null, secondSessionStart, userId, new PlayEvent(2L, 10L)));
final List<KeyValue<String, Long>> secondSession = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerProps, SessionWindowsExample.PLAY_EVENTS_PER_SESSION, 1);
// should have created a new session
assertThat(secondSession.get(0), equalTo(KeyValue.pair(userId + "@" + secondSessionStart + "->" + secondSessionStart, 1L)));
// should now have 2 active sessions in the store
final List<KeyValue<Windowed<String>, Long>> results = fetchSessionsFromLocalStore(userId, playEventsPerSession);
assertThat(results, equalTo(Arrays.asList(KeyValue.pair(new Windowed<>(userId, new SessionWindow(start, start)), 1L), KeyValue.pair(new Windowed<>(userId, new SessionWindow(secondSessionStart, secondSessionStart)), 1L))));
// create an event between the two sessions to demonstrate merging
final long mergeTime = start + SessionWindowsExample.INACTIVITY_GAP / 2;
playEventProducer.send(new ProducerRecord<>(SessionWindowsExample.PLAY_EVENTS, null, mergeTime, userId, new PlayEvent(3L, 10L)));
playEventProducer.close();
final List<KeyValue<String, Long>> merged = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerProps, SessionWindowsExample.PLAY_EVENTS_PER_SESSION, 3);
// should have merged all sessions into one and sent tombstones for the sessions that were
// merged
assertThat(merged, equalTo(Arrays.asList(KeyValue.pair(userId + "@" + start + "->" + start, null), KeyValue.pair(userId + "@" + secondSessionStart + "->" + secondSessionStart, null), KeyValue.pair(userId + "@" + start + "->" + secondSessionStart, 3L))));
// should only have the merged session in the store
final List<KeyValue<Windowed<String>, Long>> mergedResults = fetchSessionsFromLocalStore(userId, playEventsPerSession);
assertThat(mergedResults, equalTo(Collections.singletonList(KeyValue.pair(new Windowed<>(userId, new SessionWindow(start, secondSessionStart)), 3L))));
}
use of io.confluent.examples.streams.avro.PlayEvent in project kafka-streams-examples by confluentinc.
the class KafkaMusicExample method createChartsStreams.
static KafkaStreams createChartsStreams(final String bootstrapServers, final String schemaRegistryUrl, final int applicationServerPort, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-music-charts");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Provide the details of our embedded http service that we'll use to connect to this streams
// instance and discover locations of stores.
streamsConfiguration.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:" + applicationServerPort);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Set the commit interval to 500ms so that any changes are flushed frequently and the top five
// charts are updated with low latency.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
// Allow the user to fine-tune the `metadata.max.age.ms` via Java system properties from the CLI.
// Lowering this parameter from its default of 5 minutes to a few seconds is helpful in
// situations where the input topic was not pre-created before running the application because
// the application will discover a newly created topic faster. In production, you would
// typically not change this parameter from its default.
String metadataMaxAgeMs = System.getProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG);
if (metadataMaxAgeMs != null) {
try {
int value = Integer.parseInt(metadataMaxAgeMs);
streamsConfiguration.put(ConsumerConfig.METADATA_MAX_AGE_CONFIG, value);
System.out.println("Set consumer configuration " + ConsumerConfig.METADATA_MAX_AGE_CONFIG + " to " + value);
} catch (NumberFormatException ignored) {
}
}
// create and configure the SpecificAvroSerdes required in this example
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
playEventSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
keySongSerde.configure(serdeConfig, true);
final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
valueSongSerde.configure(serdeConfig, false);
final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
songPlayCountSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// get a stream of play events
final KStream<String, PlayEvent> playEvents = builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde));
// get table and create a state store to hold all the songs in the store
final KTable<Long, Song> songTable = builder.table(SONG_FEED, Materialized.<Long, Song, KeyValueStore<Bytes, byte[]>>as(ALL_SONGS).withKeySerde(Serdes.Long()).withValueSerde(valueSongSerde));
// Accept play events that have a duration >= the minimum
final KStream<Long, PlayEvent> playsBySongId = playEvents.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION).map((key, value) -> KeyValue.pair(value.getSongId(), value));
// join the plays with song as we will use it later for charting
final KStream<Long, Song> songPlays = playsBySongId.leftJoin(songTable, (value1, song) -> song, Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));
// create a state store to track song play counts
final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song, Serialized.with(keySongSerde, valueSongSerde)).count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE).withKeySerde(valueSongSerde).withValueSerde(Serdes.Long()));
final TopFiveSerde topFiveSerde = new TopFiveSerde();
// Compute the top five charts for each genre. The results of this computation will continuously update the state
// store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(song.getGenre().toLowerCase(), new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
// Compute the top five chart. The results of this computation will continuously update the state
// store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(TOP_FIVE_KEY, new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of io.confluent.examples.streams.avro.PlayEvent in project kafka-streams-examples by confluentinc.
the class SessionWindowsExample method createStreams.
static KafkaStreams createStreams(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties config = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
config.put(StreamsConfig.APPLICATION_ID_CONFIG, "session-windows-example");
config.put(StreamsConfig.CLIENT_ID_CONFIG, "session-windows-example-client");
// Where to find Kafka broker(s).
config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
config.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
config.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// disable caching to see session merging
config.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// create and configure the SpecificAvroSerdes required in this example
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
playEventSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde)).groupByKey(Serialized.with(Serdes.String(), playEventSerde)).windowedBy(SessionWindows.with(INACTIVITY_GAP)).count(Materialized.<String, Long, SessionStore<Bytes, byte[]>>as(PLAY_EVENTS_PER_SESSION).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().map((key, value) -> new KeyValue<>(key.key() + "@" + key.window().start() + "->" + key.window().end(), value)).to(PLAY_EVENTS_PER_SESSION, Produced.with(Serdes.String(), Serdes.Long()));
return new KafkaStreams(builder.build(), new StreamsConfig(config));
}
Aggregations