Search in sources :

Example 86 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class TopArticlesLambdaExample method buildTopArticlesStream.

static KafkaStreams buildTopArticlesStream(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) throws IOException {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "top-articles-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "top-articles-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, GenericAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // Serdes used in this example
    final Serde<String> stringSerde = Serdes.String();
    final Map<String, String> serdeConfig = Collections.singletonMap(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    final Serde<GenericRecord> keyAvroSerde = new GenericAvroSerde();
    keyAvroSerde.configure(serdeConfig, true);
    final Serde<GenericRecord> valueAvroSerde = new GenericAvroSerde();
    valueAvroSerde.configure(serdeConfig, false);
    final Serde<Windowed<String>> windowedStringSerde = new WindowedSerde<>(stringSerde);
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<byte[], GenericRecord> views = builder.stream(PAGE_VIEWS);
    final InputStream statsSchema = TopArticlesLambdaExample.class.getClassLoader().getResourceAsStream("avro/io/confluent/examples/streams/pageviewstats.avsc");
    final Schema schema = new Schema.Parser().parse(statsSchema);
    final KStream<GenericRecord, GenericRecord> articleViews = views.filter((dummy, record) -> isArticle(record)).map((dummy, article) -> {
        final GenericRecord clone = new GenericData.Record(article.getSchema());
        clone.put("user", "user");
        clone.put("page", article.get("page"));
        clone.put("industry", article.get("industry"));
        return new KeyValue<>(clone, clone);
    });
    final KTable<Windowed<GenericRecord>, Long> viewCounts = articleViews.groupByKey(Serialized.with(keyAvroSerde, valueAvroSerde)).windowedBy(TimeWindows.of(TimeUnit.MINUTES.toMillis(60))).count();
    final Comparator<GenericRecord> comparator = (o1, o2) -> (int) ((Long) o2.get("count") - (Long) o1.get("count"));
    final KTable<Windowed<String>, PriorityQueue<GenericRecord>> allViewCounts = viewCounts.groupBy(// the selector
    (windowedArticle, count) -> {
        // project on the industry field for key
        Windowed<String> windowedIndustry = new Windowed<>(windowedArticle.key().get("industry").toString(), windowedArticle.window());
        // add the page into the value
        GenericRecord viewStats = new GenericData.Record(schema);
        viewStats.put("page", windowedArticle.key().get("page"));
        viewStats.put("user", "user");
        viewStats.put("industry", windowedArticle.key().get("industry"));
        viewStats.put("count", count);
        return new KeyValue<>(windowedIndustry, viewStats);
    }, Serialized.with(windowedStringSerde, valueAvroSerde)).aggregate(// the initializer
    () -> new PriorityQueue<>(comparator), // the "add" aggregator
    (windowedIndustry, record, queue) -> {
        queue.add(record);
        return queue;
    }, // the "remove" aggregator
    (windowedIndustry, record, queue) -> {
        queue.remove(record);
        return queue;
    }, Materialized.with(windowedStringSerde, new PriorityQueueSerde<>(comparator, valueAvroSerde)));
    final int topN = 100;
    final KTable<Windowed<String>, String> topViewCounts = allViewCounts.mapValues(queue -> {
        final StringBuilder sb = new StringBuilder();
        for (int i = 0; i < topN; i++) {
            final GenericRecord record = queue.poll();
            if (record == null) {
                break;
            }
            sb.append(record.get("page").toString());
            sb.append("\n");
        }
        return sb.toString();
    });
    topViewCounts.toStream().to(TOP_NEWS_PER_INDUSTRY_TOPIC, Produced.with(windowedStringSerde, stringSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Produced(org.apache.kafka.streams.kstream.Produced) Serialized(org.apache.kafka.streams.kstream.Serialized) PriorityQueue(java.util.PriorityQueue) KStream(org.apache.kafka.streams.kstream.KStream) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) Serde(org.apache.kafka.common.serialization.Serde) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) Utf8(org.apache.avro.util.Utf8) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericRecord(org.apache.avro.generic.GenericRecord) KTable(org.apache.kafka.streams.kstream.KTable) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) AbstractKafkaAvroSerDeConfig(io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig) TimeUnit(java.util.concurrent.TimeUnit) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) KeyValue(org.apache.kafka.streams.KeyValue) Schema(org.apache.avro.Schema) Properties(java.util.Properties) PriorityQueueSerde(io.confluent.examples.streams.utils.PriorityQueueSerde) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) WindowedSerde(io.confluent.examples.streams.utils.WindowedSerde) KafkaStreams(org.apache.kafka.streams.KafkaStreams) InputStream(java.io.InputStream) PriorityQueue(java.util.PriorityQueue) GenericData(org.apache.avro.generic.GenericData) Windowed(org.apache.kafka.streams.kstream.Windowed) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) GenericAvroSerde(io.confluent.kafka.streams.serdes.avro.GenericAvroSerde)

Example 87 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class WikipediaFeedAvroExample method buildWikipediaFeed.

static KafkaStreams buildWikipediaFeed(final String bootstrapServers, final String schemaRegistryUrl, final String stateDir) {
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-avro-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-avro-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Where to find the Confluent schema registry instance(s)
    streamsConfiguration.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, SpecificAvroSerde.class);
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, stateDir);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final StreamsBuilder builder = new StreamsBuilder();
    // read the source stream
    final KStream<String, WikiFeed> feeds = builder.stream(WIKIPEDIA_FEED);
    // aggregate the new feed counts of by user
    final KTable<String, Long> aggregated = feeds.filter(new Predicate<String, WikiFeed>() {

        @Override
        public boolean test(final String dummy, final WikiFeed value) {
            return value.getIsNew();
        }
    }).map(new KeyValueMapper<String, WikiFeed, KeyValue<String, WikiFeed>>() {

        @Override
        public KeyValue<String, WikiFeed> apply(final String key, final WikiFeed value) {
            return new KeyValue<>(value.getUser(), value);
        }
    }).groupByKey().count();
    // write to the result topic, need to override serdes
    aggregated.toStream().to(WIKIPEDIA_STATS, Produced.with(stringSerde, longSerde));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Properties(java.util.Properties) Predicate(org.apache.kafka.streams.kstream.Predicate) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) WikiFeed(io.confluent.examples.streams.avro.WikiFeed)

Example 88 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class WordCountLambdaExample method main.

public static void main(final String[] args) throws Exception {
    final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092";
    final Properties streamsConfiguration = new Properties();
    // Give the Streams application a unique name.  The name must be unique in the Kafka cluster
    // against which the application is run.
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-lambda-example");
    streamsConfiguration.put(StreamsConfig.CLIENT_ID_CONFIG, "wordcount-lambda-example-client");
    // Where to find Kafka broker(s).
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    // Specify default (de)serializers for record keys and for record values.
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // Records should be flushed every 10 seconds. This is less than the default
    // in order to keep this example interactive.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000);
    // For illustrative purposes we disable record caches
    streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    // Set up serializers and deserializers, which we will use for overriding the default serdes
    // specified above.
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    // In the subsequent lines we define the processing topology of the Streams application.
    final StreamsBuilder builder = new StreamsBuilder();
    // Construct a `KStream` from the input topic "streams-plaintext-input", where message values
    // represent lines of text (for the sake of this example, we ignore whatever may be stored
    // in the message keys).
    // 
    // Note: We could also just call `builder.stream("streams-plaintext-input")` if we wanted to leverage
    // the default serdes specified in the Streams configuration above, because these defaults
    // match what's in the actual topic.  However we explicitly set the deserializers in the
    // call to `stream()` below in order to show how that's done, too.
    final KStream<String, String> textLines = builder.stream("streams-plaintext-input");
    final Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);
    final KTable<String, Long> wordCounts = textLines.flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase()))).groupBy((key, word) -> word).count();
    // Write the `KTable<String, Long>` to the output topic.
    wordCounts.toStream().to("streams-wordcount-output", Produced.with(stringSerde, longSerde));
    // Now that we have finished the definition of the processing topology we can actually run
    // it via `start()`.  The Streams application as a whole can be launched just like any
    // normal Java application that has a `main()` method.
    final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    // Always (and unconditionally) clean local state prior to starting the processing topology.
    // We opt for this unconditional call here because this will make it easier for you to play around with the example
    // when resetting the application for doing a re-run (via the Application Reset Tool,
    // http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
    // 
    // The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
    // will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
    // Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
    // is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
    // See `ApplicationResetExample.java` for a production-like example.
    streams.cleanUp();
    streams.start();
    // Add shutdown hook to respond to SIGTERM and gracefully close Kafka Streams
    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Arrays(java.util.Arrays) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) Serde(org.apache.kafka.common.serialization.Serde) Serdes(org.apache.kafka.common.serialization.Serdes) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KStream(org.apache.kafka.streams.kstream.KStream) Pattern(java.util.regex.Pattern) Pattern(java.util.regex.Pattern) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties)

Example 89 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class EventDeduplicationLambdaIntegrationTest method shouldRemoveDuplicatesFromTheInput.

@Test
public void shouldRemoveDuplicatesFromTheInput() throws Exception {
    // e.g. "4ff3cb44-abcb-46e3-8f9a-afb7cc74fbb8"
    String firstId = UUID.randomUUID().toString();
    String secondId = UUID.randomUUID().toString();
    String thirdId = UUID.randomUUID().toString();
    List<String> inputValues = Arrays.asList(firstId, secondId, firstId, firstId, secondId, thirdId, thirdId, firstId, secondId);
    List<String> expectedValues = Arrays.asList(firstId, secondId, thirdId);
    // 
    // Step 1: Configure and start the processor topology.
    // 
    StreamsBuilder builder = new StreamsBuilder();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "deduplication-lambda-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    // The commit interval for flushing records to state stores and downstream must be lower than
    // this integration test's timeout (30 secs) to ensure we observe the expected processing results.
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, TimeUnit.SECONDS.toMillis(10));
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
    // How long we "remember" an event.  During this time, any incoming duplicates of the event
    // will be, well, dropped, thereby de-duplicating the input data.
    // 
    // The actual value depends on your use case.  To reduce memory and disk usage, you could
    // decrease the size to purge old windows more frequently at the cost of potentially missing out
    // on de-duplicating late-arriving records.
    long maintainDurationPerEventInMs = TimeUnit.MINUTES.toMillis(10);
    // The number of segments has no impact on "correctness".
    // Using more segments implies larger overhead but allows for more fined grained record expiration
    // Note: the specified retention time is a _minimum_ time span and no strict upper time bound
    int numberOfSegments = 3;
    // retention period must be at least window size -- for this use case, we don't need a longer retention period
    // and thus just use the window size as retention time
    long retentionPeriod = maintainDurationPerEventInMs;
    StoreBuilder<WindowStore<String, Long>> dedupStoreBuilder = Stores.windowStoreBuilder(Stores.persistentWindowStore(storeName, retentionPeriod, numberOfSegments, maintainDurationPerEventInMs, false), Serdes.String(), Serdes.Long());
    builder.addStateStore(dedupStoreBuilder);
    KStream<byte[], String> input = builder.stream(inputTopic);
    KStream<byte[], String> deduplicated = input.transform(// function as needed.
    () -> new DeduplicationTransformer<>(maintainDurationPerEventInMs, (key, value) -> value), storeName);
    deduplicated.to(outputTopic);
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 3: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "deduplication-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    List<String> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
    streams.close();
    assertThat(actualValues).containsExactlyElementsOf(expectedValues);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) WindowStore(org.apache.kafka.streams.state.WindowStore) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Stores(org.apache.kafka.streams.state.Stores) KStream(org.apache.kafka.streams.kstream.KStream) WindowStore(org.apache.kafka.streams.state.WindowStore) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) TestUtils(org.apache.kafka.test.TestUtils) Transformer(org.apache.kafka.streams.kstream.Transformer) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) UUID(java.util.UUID) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) TimeUnit(java.util.concurrent.TimeUnit) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) List(java.util.List) WindowStoreIterator(org.apache.kafka.streams.state.WindowStoreIterator) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Properties(java.util.Properties) Test(org.junit.Test)

Example 90 with StreamsBuilder

use of org.apache.kafka.streams.StreamsBuilder in project kafka-streams-examples by confluentinc.

the class HandlingCorruptedInputRecordsIntegrationTest method shouldIgnoreCorruptInputRecords.

@Test
public void shouldIgnoreCorruptInputRecords() throws Exception {
    List<Long> inputValues = Arrays.asList(1L, 2L, 3L);
    List<Long> expectedValues = inputValues.stream().map(x -> 2 * x).collect(Collectors.toList());
    // 
    // Step 1: Configure and start the processor topology.
    // 
    StreamsBuilder builder = new StreamsBuilder();
    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "failure-handling-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.ByteArray().getClass().getName());
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    Serde<String> stringSerde = Serdes.String();
    Serde<Long> longSerde = Serdes.Long();
    KStream<byte[], byte[]> input = builder.stream(inputTopic);
    // Note how the returned stream is of type `KStream<String, Long>`.
    KStream<String, Long> doubled = input.flatMap((k, v) -> {
        try {
            // Attempt deserialization
            String key = stringSerde.deserializer().deserialize("input-topic", k);
            long value = longSerde.deserializer().deserialize("input-topic", v);
            // checking.
            return Collections.singletonList(KeyValue.pair(key, 2 * value));
        } catch (SerializationException e) {
            // Ignore/skip the corrupted record by catching the exception.
            // Optionally, we can log the fact that we did so:
            System.err.println("Could not deserialize record: " + e.getMessage());
        }
        return Collections.emptyList();
    });
    // Write the processing results (which was generated from valid records only) to Kafka.
    doubled.to(outputTopic, Produced.with(stringSerde, longSerde));
    KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration);
    streams.start();
    // 
    // Step 2: Produce some corrupt input data to the input topic.
    // 
    Properties producerConfigForCorruptRecords = new Properties();
    producerConfigForCorruptRecords.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfigForCorruptRecords.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfigForCorruptRecords.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfigForCorruptRecords.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfigForCorruptRecords.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, Collections.singletonList("corrupt"), producerConfigForCorruptRecords);
    // 
    // Step 3: Produce some (valid) input data to the input topic.
    // 
    Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig);
    // 
    // Step 4: Verify the application's output data.
    // 
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "map-function-lambda-integration-test-standard-consumer");
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
    List<Long> actualValues = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfig, outputTopic, expectedValues.size());
    streams.close();
    assertThat(actualValues).isEqualTo(expectedValues);
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) Arrays(java.util.Arrays) BeforeClass(org.junit.BeforeClass) Produced(org.apache.kafka.streams.kstream.Produced) SerializationException(org.apache.kafka.common.errors.SerializationException) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) KStream(org.apache.kafka.streams.kstream.KStream) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) Serde(org.apache.kafka.common.serialization.Serde) EmbeddedSingleNodeKafkaCluster(io.confluent.examples.streams.kafka.EmbeddedSingleNodeKafkaCluster) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClassRule(org.junit.ClassRule) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Collectors(java.util.stream.Collectors) List(java.util.List) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) SerializationException(org.apache.kafka.common.errors.SerializationException) Properties(java.util.Properties) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Test(org.junit.Test)

Aggregations

StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)189 Test (org.junit.Test)121 KafkaStreams (org.apache.kafka.streams.KafkaStreams)72 Properties (java.util.Properties)61 KeyValue (org.apache.kafka.streams.KeyValue)42 MockProcessorSupplier (org.apache.kafka.test.MockProcessorSupplier)30 StreamsBuilderTest (org.apache.kafka.streams.StreamsBuilderTest)27 Serdes (org.apache.kafka.common.serialization.Serdes)21 KeyValueMapper (org.apache.kafka.streams.kstream.KeyValueMapper)21 Before (org.junit.Before)19 StreamsConfig (org.apache.kafka.streams.StreamsConfig)18 KStream (org.apache.kafka.streams.kstream.KStream)18 Predicate (org.apache.kafka.streams.kstream.Predicate)18 IntegrationTest (org.apache.kafka.test.IntegrationTest)18 Bytes (org.apache.kafka.common.utils.Bytes)16 HashSet (java.util.HashSet)15 ValueMapper (org.apache.kafka.streams.kstream.ValueMapper)14 HashMap (java.util.HashMap)13 KTable (org.apache.kafka.streams.kstream.KTable)13 Produced (org.apache.kafka.streams.kstream.Produced)13