Search in sources :

Example 16 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.

the class KGroupedTableImplTest method shouldReduce.

@Test
public void shouldReduce() {
    final KeyValueMapper<String, Number, KeyValue<String, Integer>> intProjection = (key, value) -> KeyValue.pair(key, value.intValue());
    final KTable<String, Integer> reduced = builder.table(topic, Consumed.with(Serdes.String(), Serdes.Double()), Materialized.<String, Double, KeyValueStore<Bytes, byte[]>>as("store").withKeySerde(Serdes.String()).withValueSerde(Serdes.Double())).groupBy(intProjection).reduce(MockReducer.INTEGER_ADDER, MockReducer.INTEGER_SUBTRACTOR, Materialized.as("reduced"));
    final MockApiProcessorSupplier<String, Integer, Void, Void> supplier = getReducedResults(reduced);
    try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
        assertReduced(supplier.theCapturedProcessor().lastValueAndTimestampPerKey(), topic, driver);
        assertEquals(reduced.queryableStoreName(), "reduced");
    }
}
Also used : MockInitializer(org.apache.kafka.test.MockInitializer) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Assert.assertThrows(org.junit.Assert.assertThrows) MockReducer(org.apache.kafka.test.MockReducer) TopologyException(org.apache.kafka.streams.errors.TopologyException) ValueAndTimestamp(org.apache.kafka.streams.state.ValueAndTimestamp) KGroupedTable(org.apache.kafka.streams.kstream.KGroupedTable) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) MockMapper(org.apache.kafka.test.MockMapper) KTable(org.apache.kafka.streams.kstream.KTable) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) DoubleSerializer(org.apache.kafka.common.serialization.DoubleSerializer) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) Test(org.junit.Test) Grouped(org.apache.kafka.streams.kstream.Grouped) MockAggregator(org.apache.kafka.test.MockAggregator) Bytes(org.apache.kafka.common.utils.Bytes) Assert.assertNull(org.junit.Assert.assertNull) Materialized(org.apache.kafka.streams.kstream.Materialized) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Assert.assertEquals(org.junit.Assert.assertEquals) Bytes(org.apache.kafka.common.utils.Bytes) KeyValue(org.apache.kafka.streams.KeyValue) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Test(org.junit.Test)

Example 17 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.

the class SmokeTestClient method getTopology.

public Topology getTopology() {
    final StreamsBuilder builder = new StreamsBuilder();
    final Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
    final KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
    source.filterNot((k, v) -> k.equals("flush")).to("echo", Produced.with(stringSerde, intSerde));
    final KStream<String, Integer> data = source.filter((key, value) -> value == null || value != END);
    data.process(SmokeTestUtil.printProcessorSupplier("data", name));
    // min
    final KGroupedStream<String, Integer> groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde));
    final KTable<Windowed<String>, Integer> minAggregation = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1))).aggregate(() -> Integer.MAX_VALUE, (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-min").withValueSerde(intSerde).withRetention(Duration.ofHours(25)));
    streamify(minAggregation, "min-raw");
    streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed");
    minAggregation.toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("min", Produced.with(stringSerde, intSerde));
    final KTable<Windowed<String>, Integer> smallWindowSum = groupedData.windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1))).reduce(Integer::sum);
    streamify(smallWindowSum, "sws-raw");
    streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed");
    final KTable<String, Integer> minTable = builder.table("min", Consumed.with(stringSerde, intSerde), Materialized.as("minStoreName"));
    minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name));
    // max
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> Integer.MIN_VALUE, (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate, Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-max").withValueSerde(intSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("max", Produced.with(stringSerde, intSerde));
    final KTable<String, Integer> maxTable = builder.table("max", Consumed.with(stringSerde, intSerde), Materialized.as("maxStoreName"));
    maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name));
    // sum
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).aggregate(() -> 0L, (aggKey, value, aggregate) -> (long) value + aggregate, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("win-sum").withValueSerde(longSerde)).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("sum", Produced.with(stringSerde, longSerde));
    final Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
    final KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
    sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name));
    // cnt
    groupedData.windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2))).count(Materialized.as("uwin-cnt")).toStream(new Unwindow<>()).filterNot((k, v) -> k.equals("flush")).to("cnt", Produced.with(stringSerde, longSerde));
    final KTable<String, Long> cntTable = builder.table("cnt", Consumed.with(stringSerde, longSerde), Materialized.as("cntStoreName"));
    cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name));
    // dif
    maxTable.join(minTable, (value1, value2) -> value1 - value2).toStream().filterNot((k, v) -> k.equals("flush")).to("dif", Produced.with(stringSerde, intSerde));
    // avg
    sumTable.join(cntTable, (value1, value2) -> (double) value1 / (double) value2).toStream().filterNot((k, v) -> k.equals("flush")).to("avg", Produced.with(stringSerde, doubleSerde));
    // test repartition
    final Agg agg = new Agg();
    cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde)).aggregate(agg.init(), agg.adder(), agg.remover(), Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt")).withKeySerde(Serdes.String()).withValueSerde(Serdes.Long())).toStream().to("tagg", Produced.with(stringSerde, longSerde));
    return builder.build();
}
Also used : StreamsConfig(org.apache.kafka.streams.StreamsConfig) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) Produced(org.apache.kafka.streams.kstream.Produced) Stores(org.apache.kafka.streams.state.Stores) KStream(org.apache.kafka.streams.kstream.KStream) WindowStore(org.apache.kafka.streams.state.WindowStore) Suppressed.untilWindowCloses(org.apache.kafka.streams.kstream.Suppressed.untilWindowCloses) Windowed(org.apache.kafka.streams.kstream.Windowed) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) BufferConfig(org.apache.kafka.streams.kstream.Suppressed.BufferConfig) Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) Files(java.nio.file.Files) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) IOException(java.io.IOException) Instant(java.time.Instant) Grouped(org.apache.kafka.streams.kstream.Grouped) File(java.io.File) Bytes(org.apache.kafka.common.utils.Bytes) KafkaThread(org.apache.kafka.common.utils.KafkaThread) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Materialized(org.apache.kafka.streams.kstream.Materialized) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Topology(org.apache.kafka.streams.Topology) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Windowed(org.apache.kafka.streams.kstream.Windowed) Bytes(org.apache.kafka.common.utils.Bytes)

Example 18 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.

the class PageViewTypedDemo method main.

public static void main(final String[] args) {
    final Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-typed");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
    props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, JSONSerde.class);
    props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JSONSerde.class);
    props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
    // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<String, PageView> views = builder.stream("streams-pageview-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
    final KTable<String, UserProfile> users = builder.table("streams-userprofile-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
    final Duration duration24Hours = Duration.ofHours(24);
    final KStream<WindowedPageViewByRegion, RegionCount> regionCount = views.leftJoin(users, (view, profile) -> {
        final PageViewByRegion viewByRegion = new PageViewByRegion();
        viewByRegion.user = view.user;
        viewByRegion.page = view.page;
        if (profile != null) {
            viewByRegion.region = profile.region;
        } else {
            viewByRegion.region = "UNKNOWN";
        }
        return viewByRegion;
    }).map((user, viewRegion) -> new KeyValue<>(viewRegion.region, viewRegion)).groupByKey(Grouped.with(Serdes.String(), new JSONSerde<>())).windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(7), duration24Hours).advanceBy(Duration.ofSeconds(1))).count().toStream().map((key, value) -> {
        final WindowedPageViewByRegion wViewByRegion = new WindowedPageViewByRegion();
        wViewByRegion.windowStart = key.window().start();
        wViewByRegion.region = key.key();
        final RegionCount rCount = new RegionCount();
        rCount.region = key.key();
        rCount.count = value;
        return new KeyValue<>(wViewByRegion, rCount);
    });
    // write to the result topic
    regionCount.to("streams-pageviewstats-typed-output", Produced.with(new JSONSerde<>(), new JSONSerde<>()));
    final KafkaStreams streams = new KafkaStreams(builder.build(), props);
    final CountDownLatch latch = new CountDownLatch(1);
    // attach shutdown handler to catch control-c
    Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {

        @Override
        public void run() {
            streams.close();
            latch.countDown();
        }
    });
    try {
        streams.start();
        latch.await();
    } catch (final Throwable e) {
        e.printStackTrace();
        System.exit(1);
    }
    System.exit(0);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) JsonSubTypes(com.fasterxml.jackson.annotation.JsonSubTypes) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) SerializationException(org.apache.kafka.common.errors.SerializationException) Consumed(org.apache.kafka.streams.kstream.Consumed) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) IOException(java.io.IOException) KStream(org.apache.kafka.streams.kstream.KStream) Grouped(org.apache.kafka.streams.kstream.Grouped) CountDownLatch(java.util.concurrent.CountDownLatch) JsonTypeInfo(com.fasterxml.jackson.annotation.JsonTypeInfo) Serde(org.apache.kafka.common.serialization.Serde) Serializer(org.apache.kafka.common.serialization.Serializer) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Duration(java.time.Duration) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) Deserializer(org.apache.kafka.common.serialization.Deserializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) Duration(java.time.Duration) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder)

Example 19 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.

the class StreamsPartitionAssignorTest method shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount.

@Test
public void shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount() {
    builder = new CorruptedInternalTopologyBuilder();
    topologyMetadata = new TopologyMetadata(builder, new StreamsConfig(configProps()));
    final InternalStreamsBuilder streamsBuilder = new InternalStreamsBuilder(builder);
    final KStream<String, String> inputTopic = streamsBuilder.stream(singleton("topic1"), new ConsumedInternal<>());
    final KTable<String, String> inputTable = streamsBuilder.table("topic2", new ConsumedInternal<>(), new MaterializedInternal<>(Materialized.as("store")));
    inputTopic.groupBy((k, v) -> k, Grouped.with("GroupName", Serdes.String(), Serdes.String())).windowedBy(TimeWindows.of(Duration.ofMinutes(10))).aggregate(() -> "", (k, v, a) -> a + k).leftJoin(inputTable, v -> v, (x, y) -> x + y);
    streamsBuilder.buildAndOptimizeTopology();
    configureDefault();
    subscriptions.put("consumer", new Subscription(singletonList("topic"), defaultSubscriptionInfo.encode()));
    final Map<String, Assignment> assignments = partitionAssignor.assign(metadata, new GroupSubscription(subscriptions)).groupAssignment();
    assertThat(AssignmentInfo.decode(assignments.get("consumer").userData()).errCode(), equalTo(AssignorError.ASSIGNMENT_ERROR.code()));
}
Also used : MockTime(org.apache.kafka.common.utils.MockTime) ConsumedInternal(org.apache.kafka.streams.kstream.internals.ConsumedInternal) KafkaException(org.apache.kafka.common.KafkaException) Collections.singletonList(java.util.Collections.singletonList) AdminClient(org.apache.kafka.clients.admin.AdminClient) Cluster(org.apache.kafka.common.Cluster) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) Arrays.asList(java.util.Arrays.asList) Duration(java.time.Duration) Map(java.util.Map) MockKeyValueStoreBuilder(org.apache.kafka.test.MockKeyValueStoreBuilder) ReferenceContainer(org.apache.kafka.streams.processor.internals.assignment.ReferenceContainer) TASK_0_0(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_0_0) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) TASK_0_1(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_0_1) Set(java.util.Set) TASK_0_2(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_0_2) TASK_0_3(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_0_3) PartitionInfo(org.apache.kafka.common.PartitionInfo) MockClientSupplier(org.apache.kafka.test.MockClientSupplier) EMPTY_TASKS(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.EMPTY_TASKS) RebalanceProtocol(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.RebalanceProtocol) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) Assert.assertFalse(org.junit.Assert.assertFalse) Node(org.apache.kafka.common.Node) Matchers.is(org.hamcrest.Matchers.is) SubscriptionInfo(org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo) HostInfo(org.apache.kafka.streams.state.HostInfo) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) RunWith(org.junit.runner.RunWith) EasyMock.mock(org.easymock.EasyMock.mock) ArrayList(java.util.ArrayList) MockInternalTopicManager(org.apache.kafka.test.MockInternalTopicManager) StickyTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor) KafkaFutureImpl(org.apache.kafka.common.internals.KafkaFutureImpl) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Capture(org.easymock.Capture) KTable(org.apache.kafka.streams.kstream.KTable) KeyValueMapper(org.apache.kafka.streams.kstream.KeyValueMapper) Properties(java.util.Properties) Utils.mkSortedSet(org.apache.kafka.common.utils.Utils.mkSortedSet) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) EasyMock(org.easymock.EasyMock) Grouped(org.apache.kafka.streams.kstream.Grouped) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) AssignorError(org.apache.kafka.streams.processor.internals.assignment.AssignorError) InternalConfig(org.apache.kafka.streams.StreamsConfig.InternalConfig) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Subtopology(org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology) Assert.assertEquals(org.junit.Assert.assertEquals) FallbackPriorTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.FallbackPriorTaskAssignor) SortedSet(java.util.SortedSet) InternalStreamsBuilder(org.apache.kafka.streams.kstream.internals.InternalStreamsBuilder) ByteBuffer(java.nio.ByteBuffer) ListOffsetsResult(org.apache.kafka.clients.admin.ListOffsetsResult) Collections.singleton(java.util.Collections.singleton) GroupSubscription(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.GroupSubscription) TopologyWrapper(org.apache.kafka.streams.TopologyWrapper) Serdes(org.apache.kafka.common.serialization.Serdes) Assert.fail(org.junit.Assert.fail) Parameterized(org.junit.runners.Parameterized) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) Collections.emptyList(java.util.Collections.emptyList) LATEST_SUPPORTED_VERSION(org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.LATEST_SUPPORTED_VERSION) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) TASK_2_0(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_2_0) UUID(java.util.UUID) TASK_2_1(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_2_1) MaterializedInternal(org.apache.kafka.streams.kstream.internals.MaterializedInternal) AssignmentTestUtils.getInfo(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.getInfo) Collectors(java.util.stream.Collectors) AssignorConfiguration(org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration) ListOffsetsResultInfo(org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo) List(java.util.List) ValueJoiner(org.apache.kafka.streams.kstream.ValueJoiner) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Materialized(org.apache.kafka.streams.kstream.Materialized) StreamsPartitionAssignor.assignTasksToThreads(org.apache.kafka.streams.processor.internals.StreamsPartitionAssignor.assignTasksToThreads) Duration.ofMillis(java.time.Duration.ofMillis) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Assignment(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.Assignment) TaskId(org.apache.kafka.streams.processor.TaskId) AssignmentInfo(org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo) Subscription(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.Subscription) Assert.assertThrows(org.junit.Assert.assertThrows) CoreMatchers.not(org.hamcrest.CoreMatchers.not) EMPTY_CHANGELOG_END_OFFSETS(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.EMPTY_CHANGELOG_END_OFFSETS) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) AssignmentTestUtils.createMockAdminClientForAssignor(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.createMockAdminClientForAssignor) HashSet(java.util.HashSet) JoinWindows(org.apache.kafka.streams.kstream.JoinWindows) Admin(org.apache.kafka.clients.admin.Admin) Collections.singletonMap(java.util.Collections.singletonMap) UUID_1(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_1) HighAvailabilityTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.HighAvailabilityTaskAssignor) UUID_2(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_2) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Collections.emptyMap(java.util.Collections.emptyMap) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Matchers.empty(org.hamcrest.Matchers.empty) Collections.emptySet(java.util.Collections.emptySet) TASK_1_1(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_1_1) TASK_1_0(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_1_0) TASK_1_3(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_1_3) TASK_1_2(org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.TASK_1_2) EasyMock.expect(org.easymock.EasyMock.expect) ConfigException(org.apache.kafka.common.config.ConfigException) OffsetSpec(org.apache.kafka.clients.admin.OffsetSpec) ClientState(org.apache.kafka.streams.processor.internals.assignment.ClientState) TaskAssignor(org.apache.kafka.streams.processor.internals.assignment.TaskAssignor) Matchers.anEmptyMap(org.hamcrest.Matchers.anEmptyMap) Collections(java.util.Collections) InternalStreamsBuilder(org.apache.kafka.streams.kstream.internals.InternalStreamsBuilder) Assignment(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.Assignment) GroupSubscription(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.GroupSubscription) GroupSubscription(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.GroupSubscription) Subscription(org.apache.kafka.clients.consumer.ConsumerPartitionAssignor.Subscription) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Test(org.junit.Test)

Example 20 with KTable

use of org.apache.kafka.streams.kstream.KTable in project kafka by apache.

the class PageViewUntypedDemo method main.

public static void main(final String[] args) throws Exception {
    final Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-untyped");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
    props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    final StreamsBuilder builder = new StreamsBuilder();
    final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
    final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
    final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
    final Consumed<String, JsonNode> consumed = Consumed.with(Serdes.String(), jsonSerde);
    final KStream<String, JsonNode> views = builder.stream("streams-pageview-input", consumed);
    final KTable<String, JsonNode> users = builder.table("streams-userprofile-input", consumed);
    final KTable<String, String> userRegions = users.mapValues(record -> record.get("region").textValue());
    final Duration duration24Hours = Duration.ofHours(24);
    final KStream<JsonNode, JsonNode> regionCount = views.leftJoin(userRegions, (view, region) -> {
        final ObjectNode jNode = JsonNodeFactory.instance.objectNode();
        return (JsonNode) jNode.put("user", view.get("user").textValue()).put("page", view.get("page").textValue()).put("region", region == null ? "UNKNOWN" : region);
    }).map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").textValue(), viewRegion)).groupByKey(Grouped.with(Serdes.String(), jsonSerde)).windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(7), duration24Hours).advanceBy(Duration.ofSeconds(1))).count().toStream().map((key, value) -> {
        final ObjectNode keyNode = JsonNodeFactory.instance.objectNode();
        keyNode.put("window-start", key.window().start()).put("region", key.key());
        final ObjectNode valueNode = JsonNodeFactory.instance.objectNode();
        valueNode.put("count", value);
        return new KeyValue<>((JsonNode) keyNode, (JsonNode) valueNode);
    });
    // write to the result topic
    regionCount.to("streams-pageviewstats-untyped-output", Produced.with(jsonSerde, jsonSerde));
    final KafkaStreams streams = new KafkaStreams(builder.build(), props);
    streams.start();
    // usually the stream application would be running forever,
    // in this example we just let it run for some time and stop since the input data is finite.
    Thread.sleep(5000L);
    streams.close();
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Produced(org.apache.kafka.streams.kstream.Produced) Consumed(org.apache.kafka.streams.kstream.Consumed) KeyValue(org.apache.kafka.streams.KeyValue) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KStream(org.apache.kafka.streams.kstream.KStream) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) Grouped(org.apache.kafka.streams.kstream.Grouped) JsonSerializer(org.apache.kafka.connect.json.JsonSerializer) JsonDeserializer(org.apache.kafka.connect.json.JsonDeserializer) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory) Serde(org.apache.kafka.common.serialization.Serde) Serializer(org.apache.kafka.common.serialization.Serializer) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) JsonNode(com.fasterxml.jackson.databind.JsonNode) Deserializer(org.apache.kafka.common.serialization.Deserializer) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) JsonNode(com.fasterxml.jackson.databind.JsonNode) Duration(java.time.Duration) JsonSerializer(org.apache.kafka.connect.json.JsonSerializer) Properties(java.util.Properties) JsonDeserializer(org.apache.kafka.connect.json.JsonDeserializer) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder)

Aggregations

KTable (org.apache.kafka.streams.kstream.KTable)56 Serdes (org.apache.kafka.common.serialization.Serdes)51 Properties (java.util.Properties)50 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)49 KeyValue (org.apache.kafka.streams.KeyValue)41 StreamsConfig (org.apache.kafka.streams.StreamsConfig)41 Test (org.junit.Test)40 KStream (org.apache.kafka.streams.kstream.KStream)39 Materialized (org.apache.kafka.streams.kstream.Materialized)35 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)32 Consumed (org.apache.kafka.streams.kstream.Consumed)32 Produced (org.apache.kafka.streams.kstream.Produced)28 Bytes (org.apache.kafka.common.utils.Bytes)27 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)27 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)27 Grouped (org.apache.kafka.streams.kstream.Grouped)25 List (java.util.List)24 Serde (org.apache.kafka.common.serialization.Serde)24 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)24 KafkaStreams (org.apache.kafka.streams.KafkaStreams)24