Search in sources :

Example 1 with KafkaStreams

use of org.apache.kafka.streams.KafkaStreams in project kafka by apache.

the class PageViewTypedDemo method main.

public static void main(String[] args) throws Exception {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-typed");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
    // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    KStreamBuilder builder = new KStreamBuilder();
    // TODO: the following can be removed with a serialization factory
    Map<String, Object> serdeProps = new HashMap<>();
    final Serializer<PageView> pageViewSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", PageView.class);
    pageViewSerializer.configure(serdeProps, false);
    final Deserializer<PageView> pageViewDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", PageView.class);
    pageViewDeserializer.configure(serdeProps, false);
    final Serde<PageView> pageViewSerde = Serdes.serdeFrom(pageViewSerializer, pageViewDeserializer);
    final Serializer<UserProfile> userProfileSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", UserProfile.class);
    userProfileSerializer.configure(serdeProps, false);
    final Deserializer<UserProfile> userProfileDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", UserProfile.class);
    userProfileDeserializer.configure(serdeProps, false);
    final Serde<UserProfile> userProfileSerde = Serdes.serdeFrom(userProfileSerializer, userProfileDeserializer);
    final Serializer<WindowedPageViewByRegion> wPageViewByRegionSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", WindowedPageViewByRegion.class);
    wPageViewByRegionSerializer.configure(serdeProps, false);
    final Deserializer<WindowedPageViewByRegion> wPageViewByRegionDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", WindowedPageViewByRegion.class);
    wPageViewByRegionDeserializer.configure(serdeProps, false);
    final Serde<WindowedPageViewByRegion> wPageViewByRegionSerde = Serdes.serdeFrom(wPageViewByRegionSerializer, wPageViewByRegionDeserializer);
    final Serializer<RegionCount> regionCountSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", RegionCount.class);
    regionCountSerializer.configure(serdeProps, false);
    final Deserializer<RegionCount> regionCountDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", RegionCount.class);
    regionCountDeserializer.configure(serdeProps, false);
    final Serde<RegionCount> regionCountSerde = Serdes.serdeFrom(regionCountSerializer, regionCountDeserializer);
    final Serializer<PageViewByRegion> pageViewByRegionSerializer = new JsonPOJOSerializer<>();
    serdeProps.put("JsonPOJOClass", PageViewByRegion.class);
    pageViewByRegionSerializer.configure(serdeProps, false);
    final Deserializer<PageViewByRegion> pageViewByRegionDeserializer = new JsonPOJODeserializer<>();
    serdeProps.put("JsonPOJOClass", PageViewByRegion.class);
    pageViewByRegionDeserializer.configure(serdeProps, false);
    final Serde<PageViewByRegion> pageViewByRegionSerde = Serdes.serdeFrom(pageViewByRegionSerializer, pageViewByRegionDeserializer);
    KStream<String, PageView> views = builder.stream(Serdes.String(), pageViewSerde, "streams-pageview-input");
    KTable<String, UserProfile> users = builder.table(Serdes.String(), userProfileSerde, "streams-userprofile-input", "streams-userprofile-store-name");
    KStream<WindowedPageViewByRegion, RegionCount> regionCount = views.leftJoin(users, new ValueJoiner<PageView, UserProfile, PageViewByRegion>() {

        @Override
        public PageViewByRegion apply(PageView view, UserProfile profile) {
            PageViewByRegion viewByRegion = new PageViewByRegion();
            viewByRegion.user = view.user;
            viewByRegion.page = view.page;
            if (profile != null) {
                viewByRegion.region = profile.region;
            } else {
                viewByRegion.region = "UNKNOWN";
            }
            return viewByRegion;
        }
    }).map(new KeyValueMapper<String, PageViewByRegion, KeyValue<String, PageViewByRegion>>() {

        @Override
        public KeyValue<String, PageViewByRegion> apply(String user, PageViewByRegion viewRegion) {
            return new KeyValue<>(viewRegion.region, viewRegion);
        }
    }).groupByKey(Serdes.String(), pageViewByRegionSerde).count(TimeWindows.of(7 * 24 * 60 * 60 * 1000L).advanceBy(1000), "RollingSevenDaysOfPageViewsByRegion").toStream().map(new KeyValueMapper<Windowed<String>, Long, KeyValue<WindowedPageViewByRegion, RegionCount>>() {

        @Override
        public KeyValue<WindowedPageViewByRegion, RegionCount> apply(Windowed<String> key, Long value) {
            WindowedPageViewByRegion wViewByRegion = new WindowedPageViewByRegion();
            wViewByRegion.windowStart = key.window().start();
            wViewByRegion.region = key.key();
            RegionCount rCount = new RegionCount();
            rCount.region = key.key();
            rCount.count = value;
            return new KeyValue<>(wViewByRegion, rCount);
        }
    });
    // write to the result topic
    regionCount.to(wPageViewByRegionSerde, regionCountSerde, "streams-pageviewstats-typed-output");
    KafkaStreams streams = new KafkaStreams(builder, props);
    streams.start();
    // usually the stream application would be running forever,
    // in this example we just let it run for some time and stop since the input data is finite.
    Thread.sleep(5000L);
    streams.close();
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) HashMap(java.util.HashMap) Properties(java.util.Properties) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 2 with KafkaStreams

use of org.apache.kafka.streams.KafkaStreams in project kafka by apache.

the class PageViewUntypedDemo method main.

public static void main(String[] args) throws Exception {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-untyped");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
    // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    KStreamBuilder builder = new KStreamBuilder();
    final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
    final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
    final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
    KStream<String, JsonNode> views = builder.stream(Serdes.String(), jsonSerde, "streams-pageview-input");
    KTable<String, JsonNode> users = builder.table(Serdes.String(), jsonSerde, "streams-userprofile-input", "streams-userprofile-store-name");
    KTable<String, String> userRegions = users.mapValues(new ValueMapper<JsonNode, String>() {

        @Override
        public String apply(JsonNode record) {
            return record.get("region").textValue();
        }
    });
    KStream<JsonNode, JsonNode> regionCount = views.leftJoin(userRegions, new ValueJoiner<JsonNode, String, JsonNode>() {

        @Override
        public JsonNode apply(JsonNode view, String region) {
            ObjectNode jNode = JsonNodeFactory.instance.objectNode();
            return jNode.put("user", view.get("user").textValue()).put("page", view.get("page").textValue()).put("region", region == null ? "UNKNOWN" : region);
        }
    }).map(new KeyValueMapper<String, JsonNode, KeyValue<String, JsonNode>>() {

        @Override
        public KeyValue<String, JsonNode> apply(String user, JsonNode viewRegion) {
            return new KeyValue<>(viewRegion.get("region").textValue(), viewRegion);
        }
    }).groupByKey(Serdes.String(), jsonSerde).count(TimeWindows.of(7 * 24 * 60 * 60 * 1000L).advanceBy(1000), "RollingSevenDaysOfPageViewsByRegion").toStream().map(new KeyValueMapper<Windowed<String>, Long, KeyValue<JsonNode, JsonNode>>() {

        @Override
        public KeyValue<JsonNode, JsonNode> apply(Windowed<String> key, Long value) {
            ObjectNode keyNode = JsonNodeFactory.instance.objectNode();
            keyNode.put("window-start", key.window().start()).put("region", key.key());
            ObjectNode valueNode = JsonNodeFactory.instance.objectNode();
            valueNode.put("count", value);
            return new KeyValue<>((JsonNode) keyNode, (JsonNode) valueNode);
        }
    });
    // write to the result topic
    regionCount.to(jsonSerde, jsonSerde, "streams-pageviewstats-untyped-output");
    KafkaStreams streams = new KafkaStreams(builder, props);
    streams.start();
    // usually the stream application would be running forever,
    // in this example we just let it run for some time and stop since the input data is finite.
    Thread.sleep(5000L);
    streams.close();
}
Also used : KeyValue(org.apache.kafka.streams.KeyValue) JsonNode(com.fasterxml.jackson.databind.JsonNode) JsonSerializer(org.apache.kafka.connect.json.JsonSerializer) Properties(java.util.Properties) JsonDeserializer(org.apache.kafka.connect.json.JsonDeserializer) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Windowed(org.apache.kafka.streams.kstream.Windowed)

Example 3 with KafkaStreams

use of org.apache.kafka.streams.KafkaStreams in project kafka by apache.

the class WordCountProcessorDemo method main.

public static void main(String[] args) throws Exception {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount-processor");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
    props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
    // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    TopologyBuilder builder = new TopologyBuilder();
    builder.addSource("Source", "streams-file-input");
    builder.addProcessor("Process", new MyProcessorSupplier(), "Source");
    builder.addStateStore(Stores.create("Counts").withStringKeys().withIntegerValues().inMemory().build(), "Process");
    builder.addSink("Sink", "streams-wordcount-processor-output", "Process");
    KafkaStreams streams = new KafkaStreams(builder, props);
    streams.start();
    // usually the stream application would be running forever,
    // in this example we just let it run for some time and stop since the input data is finite.
    Thread.sleep(5000L);
    streams.close();
}
Also used : KafkaStreams(org.apache.kafka.streams.KafkaStreams) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) Properties(java.util.Properties)

Example 4 with KafkaStreams

use of org.apache.kafka.streams.KafkaStreams in project kafka by apache.

the class FanoutIntegrationTest method shouldFanoutTheInput.

@Test
public void shouldFanoutTheInput() throws Exception {
    final List<String> inputValues = Arrays.asList("Hello", "World");
    final List<String> expectedValuesForB = new ArrayList<>();
    final List<String> expectedValuesForC = new ArrayList<>();
    for (final String input : inputValues) {
        expectedValuesForB.add(input.toUpperCase(Locale.getDefault()));
        expectedValuesForC.add(input.toLowerCase(Locale.getDefault()));
    }
    //
    // Step 1: Configure and start the processor topology.
    //
    final KStreamBuilder builder = new KStreamBuilder();
    final Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "fanout-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, COMMIT_INTERVAL_MS);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, cacheSizeBytes);
    final KStream<byte[], String> stream1 = builder.stream(INPUT_TOPIC_A);
    final KStream<byte[], String> stream2 = stream1.mapValues(new ValueMapper<String, String>() {

        @Override
        public String apply(final String value) {
            return value.toUpperCase(Locale.getDefault());
        }
    });
    final KStream<byte[], String> stream3 = stream1.mapValues(new ValueMapper<String, String>() {

        @Override
        public String apply(final String value) {
            return value.toLowerCase(Locale.getDefault());
        }
    });
    stream2.to(OUTPUT_TOPIC_B);
    stream3.to(OUTPUT_TOPIC_C);
    final KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration);
    streams.start();
    //
    // Step 2: Produce some input data to the input topic.
    //
    final Properties producerConfig = new Properties();
    producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
    producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0);
    producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
    producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
    IntegrationTestUtils.produceValuesSynchronously(INPUT_TOPIC_A, inputValues, producerConfig, mockTime);
    //
    // Step 3: Verify the application's output data.
    //
    // Verify output topic B
    final Properties consumerConfigB = new Properties();
    consumerConfigB.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfigB.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-integration-test-standard-consumer-topicB");
    consumerConfigB.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfigB.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfigB.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    final List<String> actualValuesForB = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigB, OUTPUT_TOPIC_B, inputValues.size());
    assertThat(actualValuesForB, equalTo(expectedValuesForB));
    // Verify output topic C
    final Properties consumerConfigC = new Properties();
    consumerConfigC.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    consumerConfigC.put(ConsumerConfig.GROUP_ID_CONFIG, "fanout-integration-test-standard-consumer-topicC");
    consumerConfigC.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    consumerConfigC.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
    consumerConfigC.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
    final List<String> actualValuesForC = IntegrationTestUtils.waitUntilMinValuesRecordsReceived(consumerConfigC, OUTPUT_TOPIC_C, inputValues.size());
    streams.close();
    assertThat(actualValuesForC, equalTo(expectedValuesForC));
}
Also used : KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ArrayList(java.util.ArrayList) Properties(java.util.Properties) Test(org.junit.Test)

Example 5 with KafkaStreams

use of org.apache.kafka.streams.KafkaStreams in project kafka by apache.

the class InternalTopicIntegrationTest method shouldCompactTopicsForStateChangelogs.

@Test
public void shouldCompactTopicsForStateChangelogs() throws Exception {
    //
    // Step 1: Configure and start a simple word count topology
    //
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    final Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "compact-topics-integration-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
    streamsConfiguration.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    final KStreamBuilder builder = new KStreamBuilder();
    final KStream<String, String> textLines = builder.stream(DEFAULT_INPUT_TOPIC);
    final KStream<String, Long> wordCounts = textLines.flatMapValues(new ValueMapper<String, Iterable<String>>() {

        @Override
        public Iterable<String> apply(final String value) {
            return Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+"));
        }
    }).groupBy(MockKeyValueMapper.<String, String>SelectValueMapper()).count("Counts").toStream();
    wordCounts.to(stringSerde, longSerde, DEFAULT_OUTPUT_TOPIC);
    // Remove any state from previous test runs
    IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration);
    final KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration);
    streams.start();
    //
    // Step 2: Produce some input data to the input topic.
    //
    produceData(Arrays.asList("hello", "world", "world", "hello world"));
    //
    // Step 3: Verify the state changelog topics are compact
    //
    streams.close();
    final Properties properties = getTopicConfigProperties(ProcessorStateManager.storeChangelogTopic(applicationId, "Counts"));
    assertEquals(LogConfig.Compact(), properties.getProperty(LogConfig.CleanupPolicyProp()));
}
Also used : KStreamBuilder(org.apache.kafka.streams.kstream.KStreamBuilder) KafkaStreams(org.apache.kafka.streams.KafkaStreams) MockKeyValueMapper(org.apache.kafka.test.MockKeyValueMapper) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

KafkaStreams (org.apache.kafka.streams.KafkaStreams)40 Properties (java.util.Properties)24 KStreamBuilder (org.apache.kafka.streams.kstream.KStreamBuilder)23 Test (org.junit.Test)15 KeyValue (org.apache.kafka.streams.KeyValue)9 CountDownLatch (java.util.concurrent.CountDownLatch)8 TestCondition (org.apache.kafka.test.TestCondition)5 StreamsConfig (org.apache.kafka.streams.StreamsConfig)4 ValueJoiner (org.apache.kafka.streams.kstream.ValueJoiner)4 ValueMapper (org.apache.kafka.streams.kstream.ValueMapper)4 Field (java.lang.reflect.Field)3 ArrayList (java.util.ArrayList)3 Metrics (org.apache.kafka.common.metrics.Metrics)3 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)3 DefaultKafkaClientSupplier (org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier)3 StreamThread (org.apache.kafka.streams.processor.internals.StreamThread)3 MockKeyValueMapper (org.apache.kafka.test.MockKeyValueMapper)3 KafkaProducer (org.apache.kafka.clients.producer.KafkaProducer)2 KafkaStreamsTest (org.apache.kafka.streams.KafkaStreamsTest)2 Windowed (org.apache.kafka.streams.kstream.Windowed)2