use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class SumLambdaIntegrationTest method shouldSumEvenNumbers.
@Test
public void shouldSumEvenNumbers() throws Exception {
List<Integer> inputValues = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
List<Integer> expectedValues = Collections.singletonList(30);
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sum-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
KStream<Integer, Integer> input = builder.stream(inputTopic);
KTable<Integer, Integer> sumOfOddNumbers = input.filter((k, v) -> v % 2 == 0).selectKey((k, v) -> 1).groupByKey().reduce((v1, v2) -> v1 + v2);
sumOfOddNumbers.toStream().to(outputTopic);
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "sum-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
streams.close();
assertThat(actualValues).isEqualTo(expectedValues);
}
use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class KafkaMusicExample method createChartsStreams.
static KafkaStreams createChartsStreams(final String bootstrapServers, final String schemaRegistryUrl, final int applicationServerPort, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-music-charts");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Provide the details of our embedded http service that we'll use to connect to this streams
// instance and discover locations of stores.
streamsConfiguration.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:" + applicationServerPort);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
// Set to earliest so we don't miss any data that arrived in the topics before the process
// started
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Set the commit interval to 500ms so that any changes are flushed frequently and the top five
// charts are updated with low latency.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
// Allow the user to fine-tune the `metadata.max.age.ms` via Java system properties from the CLI.
// Lowering this parameter from its default of 5 minutes to a few seconds is helpful in
// situations where the input topic was not pre-created before running the application because
// the application will discover a newly created topic faster. In production, you would
// typically not change this parameter from its default.
String metadataMaxAgeMs = System.getProperty(ConsumerConfig.METADATA_MAX_AGE_CONFIG);
if (metadataMaxAgeMs != null) {
try {
int value = Integer.parseInt(metadataMaxAgeMs);
streamsConfiguration.put(ConsumerConfig.METADATA_MAX_AGE_CONFIG, value);
System.out.println("Set consumer configuration " + ConsumerConfig.METADATA_MAX_AGE_CONFIG + " to " + value);
} catch (NumberFormatException ignored) {
}
}
// create and configure the SpecificAvroSerdes required in this example
final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
playEventSerde.configure(serdeConfig, false);
final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
keySongSerde.configure(serdeConfig, true);
final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
valueSongSerde.configure(serdeConfig, false);
final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
songPlayCountSerde.configure(serdeConfig, false);
final StreamsBuilder builder = new StreamsBuilder();
// get a stream of play events
final KStream<String, PlayEvent> playEvents = builder.stream(PLAY_EVENTS, Consumed.with(Serdes.String(), playEventSerde));
// get table and create a state store to hold all the songs in the store
final KTable<Long, Song> songTable = builder.table(SONG_FEED, Materialized.<Long, Song, KeyValueStore<Bytes, byte[]>>as(ALL_SONGS).withKeySerde(Serdes.Long()).withValueSerde(valueSongSerde));
// Accept play events that have a duration >= the minimum
final KStream<Long, PlayEvent> playsBySongId = playEvents.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION).map((key, value) -> KeyValue.pair(value.getSongId(), value));
// join the plays with song as we will use it later for charting
final KStream<Long, Song> songPlays = playsBySongId.leftJoin(songTable, (value1, song) -> song, Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));
// create a state store to track song play counts
final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song, Serialized.with(keySongSerde, valueSongSerde)).count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE).withKeySerde(valueSongSerde).withValueSerde(Serdes.Long()));
final TopFiveSerde topFiveSerde = new TopFiveSerde();
// Compute the top five charts for each genre. The results of this computation will continuously update the state
// store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(song.getGenre().toLowerCase(), new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
// Compute the top five chart. The results of this computation will continuously update the state
// store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
// MusicPlaysRestService) for the latest charts per genre.
songPlayCounts.groupBy((song, plays) -> KeyValue.pair(TOP_FIVE_KEY, new SongPlayCount(song.getId(), plays)), Serialized.with(Serdes.String(), songPlayCountSerde)).aggregate(TopFiveSongs::new, (aggKey, value, aggregate) -> {
aggregate.add(value);
return aggregate;
}, (aggKey, value, aggregate) -> {
aggregate.remove(value);
return aggregate;
}, Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE).withKeySerde(Serdes.String()).withValueSerde(topFiveSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class EmailService method processStreams.
private KafkaStreams processStreams(final String bootstrapServers, final String stateDir) {
KStreamBuilder builder = new KStreamBuilder();
// Create the streams/tables for the join
KStream<String, Order> orders = builder.stream(ORDERS.keySerde(), ORDERS.valueSerde(), ORDERS.name());
KStream<String, Payment> payments = builder.stream(PAYMENTS.keySerde(), PAYMENTS.valueSerde(), PAYMENTS.name());
GlobalKTable<Long, Customer> customers = builder.globalTable(CUSTOMERS.keySerde(), CUSTOMERS.valueSerde(), CUSTOMERS.name());
// Rekey payments to be by OrderId for the windowed join
payments = payments.selectKey((s, payment) -> payment.getOrderId());
// Join the two streams and the table then send an email for each
orders.join(payments, EmailTuple::new, // Join Orders and Payments streams
JoinWindows.of(1 * MIN), serdes).join(customers, (key1, tuple) -> tuple.order.getCustomerId(), // note how, because we use a GKtable, we can join on any attribute of the Customer.
(tuple, customer) -> tuple.setCustomer(customer)).peek((key, emailTuple) -> emailer.sendEmail(emailTuple));
return new KafkaStreams(builder, baseStreamsConfig(bootstrapServers, stateDir, APP_ID));
}
use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class AnomalyDetectionLambdaExample method main.
public static void main(final String[] args) throws Exception {
final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "anomaly-detection-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "anomaly-detection-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// Set the commit interval to 500ms so that any changes are flushed frequently. The low latency
// would be important for anomaly detection.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 500);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// Read the source stream. In this example, we ignore whatever is stored in the record key and
// assume the record value contains the username (and each record would represent a single
// click by the corresponding user).
final KStream<String, String> views = builder.stream("UserClicks");
final KTable<Windowed<String>, Long> anomalousUsers = views.map((ignoredKey, username) -> new KeyValue<>(username, username)).groupByKey().windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(1))).count().filter((windowedUserId, count) -> count >= 3);
// Note: The following operations would NOT be needed for the actual anomaly detection,
// which would normally stop at the filter() above. We use the operations below only to
// "massage" the output data so it is easier to inspect on the console via
// kafka-console-consumer.
final KStream<String, Long> anomalousUsersForConsole = anomalousUsers.toStream().filter((windowedUserId, count) -> count != null).map((windowedUserId, count) -> new KeyValue<>(windowedUserId.toString(), count));
// write to the result topic
anomalousUsersForConsole.to("AnomalousUsers", Produced.with(stringSerde, longSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
streams.cleanUp();
streams.start();
// Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
use of org.apache.kafka.streams.kstream.KStream in project kafka-streams-examples by confluentinc.
the class WikipediaFeedAvroLambdaExample method buildWikipediaFeed.
static KafkaStreams buildWikipediaFeed(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
final Properties streamsConfiguration = new Properties();
// Give the Streams application a unique name. The name must be unique in the Kafka cluster
// against which the application is run.
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-avro-lambda-example");
streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-avro-lambda-example-client");
// Where to find Kafka broker(s).
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
// Where to find the Confluent schema registry instance(s)
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
// Specify default (de)serializers for record keys and for record values.
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
final StreamsBuilder builder = new StreamsBuilder();
// read the source stream
final KStream<String, WikiFeed> feeds = builder.stream(WikipediaFeedAvroExample.WIKIPEDIA_FEED);
// aggregate the new feed counts of by user
final KTable<String, Long> aggregated = feeds.filter((dummy, value) -> value.getIsNew()).map((key, value) -> new KeyValue<>(value.getUser(), value)).groupByKey().count();
// write to the result topic, need to override serdes
aggregated.toStream().to(WikipediaFeedAvroExample.WIKIPEDIA_STATS, Produced.with(stringSerde, longSerde));
return new KafkaStreams(builder.build(), streamsConfiguration);
}
Aggregations