Search in sources :

Example 86 with Topology

use of org.apache.kafka.streams.Topology in project kafka by apache.

the class HighAvailabilityTaskAssignorIntegrationTest method shouldScaleOutWithWarmupTasks.

private void shouldScaleOutWithWarmupTasks(final Function<String, Materialized<Object, Object, KeyValueStore<Bytes, byte[]>>> materializedFunction) throws InterruptedException {
    final String testId = safeUniqueTestName(getClass(), testName);
    final String appId = "appId_" + System.currentTimeMillis() + "_" + testId;
    final String inputTopic = "input" + testId;
    final Set<TopicPartition> inputTopicPartitions = mkSet(new TopicPartition(inputTopic, 0), new TopicPartition(inputTopic, 1));
    final String storeName = "store" + testId;
    final String storeChangelog = appId + "-store" + testId + "-changelog";
    final Set<TopicPartition> changelogTopicPartitions = mkSet(new TopicPartition(storeChangelog, 0), new TopicPartition(storeChangelog, 1));
    IntegrationTestUtils.cleanStateBeforeTest(CLUSTER, 2, inputTopic, storeChangelog);
    final ReentrantLock assignmentLock = new ReentrantLock();
    final AtomicInteger assignmentsCompleted = new AtomicInteger(0);
    final Map<Integer, Boolean> assignmentsStable = new ConcurrentHashMap<>();
    final AtomicBoolean assignmentStable = new AtomicBoolean(false);
    final AssignmentListener assignmentListener = stable -> {
        assignmentLock.lock();
        try {
            final int thisAssignmentIndex = assignmentsCompleted.incrementAndGet();
            assignmentsStable.put(thisAssignmentIndex, stable);
            assignmentStable.set(stable);
        } finally {
            assignmentLock.unlock();
        }
    };
    final StreamsBuilder builder = new StreamsBuilder();
    builder.table(inputTopic, materializedFunction.apply(storeName));
    final Topology topology = builder.build();
    final int numberOfRecords = 500;
    produceTestData(inputTopic, numberOfRecords);
    try (final KafkaStreams kafkaStreams0 = new KafkaStreams(topology, streamsProperties(appId, assignmentListener));
        final KafkaStreams kafkaStreams1 = new KafkaStreams(topology, streamsProperties(appId, assignmentListener));
        final Consumer<String, String> consumer = new KafkaConsumer<>(getConsumerProperties())) {
        kafkaStreams0.start();
        // sanity check: just make sure we actually wrote all the input records
        TestUtils.waitForCondition(() -> getEndOffsetSum(inputTopicPartitions, consumer) == numberOfRecords, 120_000L, () -> "Input records haven't all been written to the input topic: " + getEndOffsetSum(inputTopicPartitions, consumer));
        // wait until all the input records are in the changelog
        TestUtils.waitForCondition(() -> getEndOffsetSum(changelogTopicPartitions, consumer) == numberOfRecords, 120_000L, () -> "Input records haven't all been written to the changelog: " + getEndOffsetSum(changelogTopicPartitions, consumer));
        final AtomicLong instance1TotalRestored = new AtomicLong(-1);
        final AtomicLong instance1NumRestored = new AtomicLong(-1);
        final CountDownLatch restoreCompleteLatch = new CountDownLatch(1);
        kafkaStreams1.setGlobalStateRestoreListener(new StateRestoreListener() {

            @Override
            public void onRestoreStart(final TopicPartition topicPartition, final String storeName, final long startingOffset, final long endingOffset) {
            }

            @Override
            public void onBatchRestored(final TopicPartition topicPartition, final String storeName, final long batchEndOffset, final long numRestored) {
                instance1NumRestored.accumulateAndGet(numRestored, (prev, restored) -> prev == -1 ? restored : prev + restored);
            }

            @Override
            public void onRestoreEnd(final TopicPartition topicPartition, final String storeName, final long totalRestored) {
                instance1TotalRestored.accumulateAndGet(totalRestored, (prev, restored) -> prev == -1 ? restored : prev + restored);
                restoreCompleteLatch.countDown();
            }
        });
        final int assignmentsBeforeScaleOut = assignmentsCompleted.get();
        kafkaStreams1.start();
        TestUtils.waitForCondition(() -> {
            assignmentLock.lock();
            try {
                if (assignmentsCompleted.get() > assignmentsBeforeScaleOut) {
                    assertFalseNoRetry(assignmentsStable.get(assignmentsBeforeScaleOut + 1), "the first assignment after adding a node should be unstable while we warm up the state.");
                    return true;
                } else {
                    return false;
                }
            } finally {
                assignmentLock.unlock();
            }
        }, 120_000L, "Never saw a first assignment after scale out: " + assignmentsCompleted.get());
        TestUtils.waitForCondition(assignmentStable::get, 120_000L, "Assignment hasn't become stable: " + assignmentsCompleted.get() + " Note, if this does fail, check and see if the new instance just failed to catch up within" + " the probing rebalance interval. A full minute should be long enough to read ~500 records" + " in any test environment, but you never know...");
        restoreCompleteLatch.await();
        // We should finalize the restoration without having restored any records (because they're already in
        // the store. Otherwise, we failed to properly re-use the state from the standby.
        assertThat(instance1TotalRestored.get(), is(0L));
        // Belt-and-suspenders check that we never even attempt to restore any records.
        assertThat(instance1NumRestored.get(), is(-1L));
    }
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) Stores(org.apache.kafka.streams.state.Stores) Utils.mkProperties(org.apache.kafka.common.utils.Utils.mkProperties) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) Utils.mkObjectProperties(org.apache.kafka.common.utils.Utils.mkObjectProperties) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Category(org.junit.experimental.categories.Category) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) Bytes(org.apache.kafka.common.utils.Bytes) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) CountDownLatch(java.util.concurrent.CountDownLatch) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) Materialized(org.apache.kafka.streams.kstream.Materialized) Matchers.is(org.hamcrest.Matchers.is) Topology(org.apache.kafka.streams.Topology) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) StreamsConfig(org.apache.kafka.streams.StreamsConfig) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) BeforeClass(org.junit.BeforeClass) IntegrationTest(org.apache.kafka.test.IntegrationTest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AssignmentListener(org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentListener) Function(java.util.function.Function) NoRetryException(org.apache.kafka.test.NoRetryException) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) HighAvailabilityTaskAssignor(org.apache.kafka.streams.processor.internals.assignment.HighAvailabilityTaskAssignor) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Producer(org.apache.kafka.clients.producer.Producer) Test(org.junit.Test) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Rule(org.junit.Rule) KafkaStreams(org.apache.kafka.streams.KafkaStreams) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Topology(org.apache.kafka.streams.Topology) AssignmentListener(org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentListener) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TopicPartition(org.apache.kafka.common.TopicPartition) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 87 with Topology

use of org.apache.kafka.streams.Topology in project kafka by apache.

the class KTableKTableForeignKeyJoinMaterializationIntegrationTest method shouldEmitTombstoneWhenDeletingNonJoiningRecords.

@Test
public void shouldEmitTombstoneWhenDeletingNonJoiningRecords() {
    final Topology topology = getTopology(streamsConfig, "store");
    try (final TopologyTestDriver driver = new TopologyTestDriver(topology, streamsConfig)) {
        final TestInputTopic<String, String> left = driver.createInputTopic(LEFT_TABLE, new StringSerializer(), new StringSerializer());
        final TestOutputTopic<String, String> outputTopic = driver.createOutputTopic(OUTPUT, new StringDeserializer(), new StringDeserializer());
        final KeyValueStore<String, String> store = driver.getKeyValueStore("store");
        left.pipeInput("lhs1", "lhsValue1|rhs1");
        assertThat(outputTopic.readKeyValuesToMap(), is(emptyMap()));
        if (materialized && queryable) {
            assertThat(asMap(store), is(emptyMap()));
        }
        // Deleting a non-joining record produces an unnecessary tombstone for inner joins, because
        // it's not possible to know whether a result was previously emitted.
        left.pipeInput("lhs1", (String) null);
        {
            if (materialized && queryable) {
                // in only this specific case, the record cache will actually be activated and
                // suppress the unnecessary tombstone. This is because the cache is able to determine
                // for sure that there has never been a previous result. (Because the "old" and "new" values
                // are both null, and the underlying store is also missing the record in question).
                assertThat(outputTopic.readKeyValuesToMap(), is(emptyMap()));
                assertThat(asMap(store), is(emptyMap()));
            } else {
                assertThat(outputTopic.readKeyValuesToMap(), is(mkMap(mkEntry("lhs1", null))));
            }
        }
        // Deleting a non-existing record is idempotent
        left.pipeInput("lhs1", (String) null);
        {
            assertThat(outputTopic.readKeyValuesToMap(), is(emptyMap()));
            if (materialized && queryable) {
                assertThat(asMap(store), is(emptyMap()));
            }
        }
    }
}
Also used : StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) Topology(org.apache.kafka.streams.Topology) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) Test(org.junit.Test)

Example 88 with Topology

use of org.apache.kafka.streams.Topology in project kafka by apache.

the class AbstractResetIntegrationTest method setupTopologyWithIntermediateTopic.

@SuppressWarnings("deprecation")
private Topology setupTopologyWithIntermediateTopic(final boolean useRepartitioned, final String outputTopic2) {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<Long, String> input = builder.stream(INPUT_TOPIC);
    // use map to trigger internal re-partitioning before groupByKey
    input.map(KeyValue::new).groupByKey().count().toStream().to(OUTPUT_TOPIC, Produced.with(Serdes.Long(), Serdes.Long()));
    final KStream<Long, String> stream;
    if (useRepartitioned) {
        stream = input.repartition();
    } else {
        input.to(INTERMEDIATE_USER_TOPIC);
        stream = builder.stream(INTERMEDIATE_USER_TOPIC);
    }
    stream.groupByKey().windowedBy(TimeWindows.of(ofMillis(35)).advanceBy(ofMillis(10))).count().toStream().map((key, value) -> new KeyValue<>(key.window().start() + key.window().end(), value)).to(outputTopic2, Produced.with(Serdes.Long(), Serdes.Long()));
    return builder.build();
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Password(org.apache.kafka.common.config.types.Password) StreamsConfig(org.apache.kafka.streams.StreamsConfig) MockTime(org.apache.kafka.common.utils.MockTime) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) IntegrationTest(org.apache.kafka.test.IntegrationTest) KStream(org.apache.kafka.streams.kstream.KStream) ArrayList(java.util.ArrayList) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) Duration(java.time.Duration) Map(java.util.Map) StreamsResetter(kafka.tools.StreamsResetter) Admin(org.apache.kafka.clients.admin.Admin) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) SslConfigs(org.apache.kafka.common.config.SslConfigs) Topic(org.apache.kafka.common.internals.Topic) CommonClientConfigs(org.apache.kafka.clients.CommonClientConfigs) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AfterClass(org.junit.AfterClass) Properties(java.util.Properties) IntegrationTestUtils.waitForEmptyConsumerGroup(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForEmptyConsumerGroup) TestUtils(org.apache.kafka.test.TestUtils) BufferedWriter(java.io.BufferedWriter) KeyValue(org.apache.kafka.streams.KeyValue) FileWriter(java.io.FileWriter) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) File(java.io.File) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Rule(org.junit.Rule) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Assert(org.junit.Assert) Collections(java.util.Collections) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) TemporaryFolder(org.junit.rules.TemporaryFolder) KeyValue(org.apache.kafka.streams.KeyValue)

Example 89 with Topology

use of org.apache.kafka.streams.Topology in project kafka by apache.

the class AbstractResetIntegrationTest method setupTopologyWithoutIntermediateUserTopic.

protected Topology setupTopologyWithoutIntermediateUserTopic() {
    final StreamsBuilder builder = new StreamsBuilder();
    final KStream<Long, String> input = builder.stream(INPUT_TOPIC);
    // use map to trigger internal re-partitioning before groupByKey
    input.map((key, value) -> new KeyValue<>(key, key)).to(OUTPUT_TOPIC, Produced.with(Serdes.Long(), Serdes.Long()));
    return builder.build();
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Password(org.apache.kafka.common.config.types.Password) StreamsConfig(org.apache.kafka.streams.StreamsConfig) MockTime(org.apache.kafka.common.utils.MockTime) Arrays(java.util.Arrays) Produced(org.apache.kafka.streams.kstream.Produced) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) IntegrationTest(org.apache.kafka.test.IntegrationTest) KStream(org.apache.kafka.streams.kstream.KStream) ArrayList(java.util.ArrayList) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) Duration(java.time.Duration) Map(java.util.Map) StreamsResetter(kafka.tools.StreamsResetter) Admin(org.apache.kafka.clients.admin.Admin) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) SslConfigs(org.apache.kafka.common.config.SslConfigs) Topic(org.apache.kafka.common.internals.Topic) CommonClientConfigs(org.apache.kafka.clients.CommonClientConfigs) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) AfterClass(org.junit.AfterClass) Properties(java.util.Properties) IntegrationTestUtils.waitForEmptyConsumerGroup(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForEmptyConsumerGroup) TestUtils(org.apache.kafka.test.TestUtils) BufferedWriter(java.io.BufferedWriter) KeyValue(org.apache.kafka.streams.KeyValue) FileWriter(java.io.FileWriter) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) Test(org.junit.Test) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) File(java.io.File) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Rule(org.junit.Rule) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Assert(org.junit.Assert) Collections(java.util.Collections) Duration.ofMillis(java.time.Duration.ofMillis) Topology(org.apache.kafka.streams.Topology) TemporaryFolder(org.junit.rules.TemporaryFolder) KeyValue(org.apache.kafka.streams.KeyValue)

Example 90 with Topology

use of org.apache.kafka.streams.Topology in project kafka by apache.

the class KStreamKTableJoinTest method shouldCreateRepartitionTopicsWithUserProvidedName.

@Test
public void shouldCreateRepartitionTopicsWithUserProvidedName() {
    final StreamsBuilder builder = new StreamsBuilder();
    final Properties props = new Properties();
    props.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.NO_OPTIMIZATION);
    final KStream<String, String> streamA = builder.stream("topic", Consumed.with(Serdes.String(), Serdes.String()));
    final KTable<String, String> tableB = builder.table("topic2", Consumed.with(Serdes.String(), Serdes.String()));
    final KTable<String, String> tableC = builder.table("topic3", Consumed.with(Serdes.String(), Serdes.String()));
    final KStream<String, String> rekeyedStream = streamA.map((k, v) -> new KeyValue<>(v, k));
    rekeyedStream.join(tableB, (value1, value2) -> value1 + value2, Joined.with(Serdes.String(), Serdes.String(), Serdes.String(), "first-join")).to("out-one");
    rekeyedStream.join(tableC, (value1, value2) -> value1 + value2, Joined.with(Serdes.String(), Serdes.String(), Serdes.String(), "second-join")).to("out-two");
    final Topology topology = builder.build(props);
    System.out.println(topology.describe().toString());
    assertEquals(expectedTopologyWithUserProvidedRepartitionTopicNames, topology.describe().toString());
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) StreamsConfig(org.apache.kafka.streams.StreamsConfig) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) Arrays(java.util.Arrays) Random(java.util.Random) KStream(org.apache.kafka.streams.kstream.KStream) Joined(org.apache.kafka.streams.kstream.Joined) MockApiProcessor(org.apache.kafka.test.MockApiProcessor) HashSet(java.util.HashSet) MockApiProcessorSupplier(org.apache.kafka.test.MockApiProcessorSupplier) Duration(java.time.Duration) TopologyWrapper(org.apache.kafka.streams.TopologyWrapper) After(org.junit.After) IntegerSerializer(org.apache.kafka.common.serialization.IntegerSerializer) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) TopologyTestDriver(org.apache.kafka.streams.TopologyTestDriver) MockValueJoiner(org.apache.kafka.test.MockValueJoiner) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) KTable(org.apache.kafka.streams.kstream.KTable) Properties(java.util.Properties) Consumed(org.apache.kafka.streams.kstream.Consumed) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) Set(java.util.Set) Test(org.junit.Test) Instant(java.time.Instant) KeyValueTimestamp(org.apache.kafka.streams.KeyValueTimestamp) LogCaptureAppender(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender) TestInputTopic(org.apache.kafka.streams.TestInputTopic) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Assert.assertEquals(org.junit.Assert.assertEquals) Topology(org.apache.kafka.streams.Topology) Topology(org.apache.kafka.streams.Topology) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

Topology (org.apache.kafka.streams.Topology)127 Test (org.junit.Test)106 StreamsBuilder (org.apache.kafka.streams.StreamsBuilder)93 KafkaStreams (org.apache.kafka.streams.KafkaStreams)53 TopologyTestDriver (org.apache.kafka.streams.TopologyTestDriver)53 Properties (java.util.Properties)47 StringSerializer (org.apache.kafka.common.serialization.StringSerializer)46 KeyValue (org.apache.kafka.streams.KeyValue)40 Serdes (org.apache.kafka.common.serialization.Serdes)39 StreamsConfig (org.apache.kafka.streams.StreamsConfig)33 List (java.util.List)29 MutableSpan (brave.handler.MutableSpan)28 Consumed (org.apache.kafka.streams.kstream.Consumed)28 Produced (org.apache.kafka.streams.kstream.Produced)26 Arrays (java.util.Arrays)25 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)25 ArrayList (java.util.ArrayList)23 ProcessorContext (org.apache.kafka.streams.processor.ProcessorContext)23 Duration (java.time.Duration)22 KStream (org.apache.kafka.streams.kstream.KStream)22