use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class ValidationsAggregatorService method aggregateOrderValidations.
private KafkaStreams aggregateOrderValidations(String bootstrapServers, String stateDir) {
// TODO put into a KTable to make dynamically configurable
final int numberOfRules = 3;
StreamsBuilder builder = new StreamsBuilder();
KStream<String, OrderValidation> validations = builder.stream(ORDER_VALIDATIONS.name(), serdes1);
KStream<String, Order> orders = builder.stream(ORDERS.name(), serdes2).filter((id, order) -> OrderState.CREATED.equals(order.getState()));
// If all rules pass then validate the order
validations.groupByKey(serdes3).windowedBy(SessionWindows.with(5 * MIN)).aggregate(() -> 0L, (id, result, total) -> PASS.equals(result.getValidationResult()) ? total + 1 : total, // include a merger as we're using session windows.
(k, a, b) -> b == null ? a : b, Materialized.with(null, Serdes.Long())).toStream((windowedKey, total) -> windowedKey.key()).filter((k1, v) -> v != null).filter((k, total) -> total >= numberOfRules).join(orders, (id, order) -> newBuilder(order).setState(VALIDATED).build(), JoinWindows.of(5 * MIN), serdes4).to(ORDERS.name(), serdes5);
// If any rule fails then fail the order
validations.filter((id, rule) -> FAIL.equals(rule.getValidationResult())).join(orders, (id, order) -> newBuilder(order).setState(OrderState.FAILED).build(), JoinWindows.of(5 * MIN), serdes7).groupByKey(serdes6).reduce((order, v1) -> order).toStream().to(ORDERS.name(), Produced.with(ORDERS.keySerde(), ORDERS.valueSerde()));
return new KafkaStreams(builder.build(), baseStreamsConfig(bootstrapServers, stateDir, ORDERS_SERVICE_APP_ID));
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class SpecificAvroIntegrationTest method shouldRoundTripSpecificAvroDataThroughKafka.
@Test
public void shouldRoundTripSpecificAvroDataThroughKafka() throws Exception {
List<WikiFeed> inputValues = Collections.singletonList(WikiFeed.newBuilder().setUser("alice").setIsNew(true).setContent("lorem ipsum").build());
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "specific-avro-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Write the input data as-is to the output topic.
//
// Normally, because a) we have already configured the correct default serdes for keys and
// values and b) the types for keys and values are the same for both the input topic and the
// output topic, we would only need to define:
//
// builder.stream(inputTopic).to(outputTopic);
//
// However, in the code below we intentionally override the default serdes in `to()` to
// demonstrate how you can construct and configure a specific Avro serde manually.
final Serde<String> stringSerde = Serdes.String();
final Serde<WikiFeed> specificAvroSerde = new SpecificAvroSerde<>();
// Note how we must manually call `configure()` on this serde to configure the schema registry
// url. This is different from the case of setting default serdes (see `streamsConfiguration`
// above), which will be auto-configured based on the `StreamsConfiguration` instance.
final boolean isKeySerde = false;
specificAvroSerde.configure(Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl()), isKeySerde);
KStream<String, WikiFeed> stream = builder.stream(inputTopic);
stream.to(outputTopic, Produced.with(stringSerde, specificAvroSerde));
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
producerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "specific-avro-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class);
consumerConfig.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, CLUSTER.schemaRegistryUrl());
consumerConfig.put(KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG, true);
List<WikiFeed> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, inputValues.size());
streams.close();
assertEquals(inputValues, actualValues);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class StreamToStreamJoinIntegrationTest method shouldJoinTwoStreams.
@Test
public void shouldJoinTwoStreams() throws Exception {
// Input 1: Ad impressions
List<KeyValue<String, String>> inputAdImpressions = Arrays.asList(new KeyValue<>("car-advertisement", "shown"), new KeyValue<>("newspaper-advertisement", "shown"), new KeyValue<>("gadget-advertisement", "shown"));
// Input 2: Ad clicks
List<KeyValue<String, String>> inputAdClicks = Arrays.asList(new KeyValue<>("newspaper-advertisement", "clicked"), new KeyValue<>("gadget-advertisement", "clicked"), new KeyValue<>("newspaper-advertisement", "clicked"));
List<KeyValue<String, String>> expectedResults = Arrays.asList(new KeyValue<>("car-advertisement", "shown/null"), new KeyValue<>("newspaper-advertisement", "shown/null"), new KeyValue<>("gadget-advertisement", "shown/null"), new KeyValue<>("newspaper-advertisement", "shown/clicked"), new KeyValue<>("gadget-advertisement", "shown/clicked"), new KeyValue<>("newspaper-advertisement", "shown/clicked"));
//
// Step 1: Configure and start the processor topology.
//
final Serde<String> stringSerde = Serdes.String();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "stream-stream-join-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
StreamsBuilder builder = new StreamsBuilder();
KStream<String, String> alerts = builder.stream(adImpressionsTopic);
KStream<String, String> incidents = builder.stream(adClicksTopic);
// In this example, we opt to perform an OUTER JOIN between the two streams. We picked this
// join type to show how the Streams API will send further join updates downstream whenever,
// for the same join key (e.g. "newspaper-advertisement"), we receive an update from either of
// the two joined streams during the defined join window.
KStream<String, String> impressionsAndClicks = alerts.outerJoin(incidents, (impressionValue, clickValue) -> impressionValue + "/" + clickValue, // KStream-KStream joins are always windowed joins, hence we must provide a join window.
JoinWindows.of(TimeUnit.SECONDS.toMillis(5)));
// Write the results to the output topic.
impressionsAndClicks.to(outputTopic);
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Publish ad impressions.
//
Properties alertsProducerConfig = new Properties();
alertsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
alertsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
alertsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
alertsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
alertsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(adImpressionsTopic, inputAdImpressions, alertsProducerConfig);
//
// Step 3: Publish ad clicks.
//
Properties incidentsProducerConfig = new Properties();
incidentsProducerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
incidentsProducerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
incidentsProducerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
incidentsProducerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
incidentsProducerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceKeyValuesSynchronously(adClicksTopic, inputAdClicks, incidentsProducerConfig);
//
// Step 4: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "stream-stream-join-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
List<KeyValue<String, String>> actualResults = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(consumerConfig, outputTopic, expectedResults.size());
streams.close();
assertThat(actualResults).containsExactlyElementsOf(expectedResults);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class SumLambdaIntegrationTest method shouldSumEvenNumbers.
@Test
public void shouldSumEvenNumbers() throws Exception {
List<Integer> inputValues = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
List<Integer> expectedValues = Collections.singletonList(30);
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "sum-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass().getName());
// The commit interval for flushing records to state stores and downstream must be lower than
// this integration test's timeout (30 secs) to ensure we observe the expected processing results.
streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// Use a temporary directory for storing state, which will be automatically removed after the test.
streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
KStream<Integer, Integer> input = builder.stream(inputTopic);
KTable<Integer, Integer> sumOfOddNumbers = input.filter((k, v) -> v % 2 == 0).selectKey((k, v) -> 1).groupByKey().reduce((v1, v2) -> v1 + v2);
sumOfOddNumbers.toStream().to(outputTopic);
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class);
IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "sum-lambda-integration-test-standard-consumer");
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, IntegerDeserializer.class);
List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
streams.close();
assertThat(actualValues).isEqualTo(expectedValues);
}
use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.
the class FanoutLambdaIntegrationTest method shouldFanoutTheInput.
@Test
public void shouldFanoutTheInput() throws Exception {
List<String> inputValues = Arrays.asList("Hello", "World");
List<String> expectedValuesForB = inputValues.stream().map(String::toUpperCase).collect(Collectors.toList());
List<String> expectedValuesForC = inputValues.stream().map(String::toLowerCase).collect(Collectors.toList());
//
// Step 1: Configure and start the processor topology.
//
StreamsBuilder builder = new StreamsBuilder();
Properties streamsConfiguration = new Properties();
streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "fanout-lambda-integration-test");
streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
KStream<byte[], String> stream1 = builder.stream(inputTopicA);
KStream<byte[], String> stream2 = stream1.mapValues(String::toUpperCase);
KStream<byte[], String> stream3 = stream1.mapValues(String::toLowerCase);
stream2.to(outputTopicB);
stream3.to(outputTopicC);
KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
streams.start();
//
// Step 2: Produce some input data to the input topic.
//
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
IntegrationTestUtils.produceValuesSynchronously(inputTopicA, inputValues, producerConfig);
//
// Step 3: Verify the application's output data.
//
// Verify output topic B
Properties consumerConfigB = new Properties();
consumerConfigB.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfigB.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-lambda-integration-test-standard-consumer-topicB");
consumerConfigB.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfigB.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfigB.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
List<String> actualValuesForB = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigB, outputTopicB, inputValues.size());
assertThat(actualValuesForB).isEqualTo(expectedValuesForB);
// Verify output topic C
Properties consumerConfigC = new Properties();
consumerConfigC.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
consumerConfigC.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-lambda-integration-test-standard-consumer-topicC");
consumerConfigC.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
consumerConfigC.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
consumerConfigC.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
List<String> actualValuesForC = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigC, outputTopicC, inputValues.size());
streams.close();
assertThat(actualValuesForC).isEqualTo(expectedValuesForC);
}
Aggregations