Search in sources :

Example 1 with State

use of org.apache.kafka.streams.KafkaStreams.State in project kafka by apache.

the class KafkaStreamsTest method testStateThreadClose.

@Test
public void testStateThreadClose() throws Exception {
    // make sure we have the global state thread running too
    final StreamsBuilder builder = getBuilderWithSource();
    builder.globalTable("anyTopic");
    try (final KafkaStreams streams = new KafkaStreams(builder.build(), props, supplier, time)) {
        assertEquals(NUM_THREADS, streams.threads.size());
        assertEquals(streams.state(), KafkaStreams.State.CREATED);
        streams.start();
        waitForCondition(() -> streams.state() == KafkaStreams.State.RUNNING, "Streams never started.");
        for (int i = 0; i < NUM_THREADS; i++) {
            final StreamThread tmpThread = streams.threads.get(i);
            tmpThread.shutdown();
            waitForCondition(() -> tmpThread.state() == StreamThread.State.DEAD, "Thread never stopped.");
            streams.threads.get(i).join();
        }
        waitForCondition(() -> streams.metadataForLocalThreads().stream().allMatch(t -> t.threadState().equals("DEAD")), "Streams never stopped");
        streams.close();
        waitForCondition(() -> streams.state() == KafkaStreams.State.NOT_RUNNING, "Streams never stopped.");
        assertNull(streams.globalStreamThread);
    }
}
Also used : RecordingLevel(org.apache.kafka.common.metrics.Sensor.RecordingLevel) MockTime(org.apache.kafka.common.utils.MockTime) CoreMatchers.hasItem(org.hamcrest.CoreMatchers.hasItem) RocksDBMetricsRecordingTrigger(org.apache.kafka.streams.state.internals.metrics.RocksDBMetricsRecordingTrigger) QueryableStoreTypes.keyValueStore(org.apache.kafka.streams.state.QueryableStoreTypes.keyValueStore) EasyMock.capture(org.easymock.EasyMock.capture) ThreadMetadataImpl(org.apache.kafka.streams.processor.internals.ThreadMetadataImpl) Stores(org.apache.kafka.streams.state.Stores) TopologyMetadata(org.apache.kafka.streams.processor.internals.TopologyMetadata) StateDirectory(org.apache.kafka.streams.processor.internals.StateDirectory) MockProcessorSupplier(org.apache.kafka.test.MockProcessorSupplier) Collections.singletonList(java.util.Collections.singletonList) ListOffsetsResult(org.apache.kafka.clients.admin.ListOffsetsResult) Cluster(org.apache.kafka.common.Cluster) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) StreamsMetricsImpl(org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl) Assert.fail(org.junit.Assert.fail) ThreadFactory(java.util.concurrent.ThreadFactory) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) StreamsConfigUtils(org.apache.kafka.streams.internals.StreamsConfigUtils) ProcessorContext(org.apache.kafka.streams.processor.api.ProcessorContext) StreamsMetadataState(org.apache.kafka.streams.processor.internals.StreamsMetadataState) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Time(org.apache.kafka.common.utils.Time) TestUtils(org.apache.kafka.test.TestUtils) Collections.emptyList(java.util.Collections.emptyList) MetricConfig(org.apache.kafka.common.metrics.MetricConfig) Set(java.util.Set) UUID(java.util.UUID) State(org.apache.kafka.streams.KafkaStreams.State) StreamsNotStartedException(org.apache.kafka.streams.errors.StreamsNotStartedException) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) PowerMock(org.powermock.api.easymock.PowerMock) InetSocketAddress(java.net.InetSocketAddress) MockClientSupplier(org.apache.kafka.test.MockClientSupplier) Executors(java.util.concurrent.Executors) MockAdminClient(org.apache.kafka.clients.admin.MockAdminClient) ListOffsetsResultInfo(org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo) List(java.util.List) Metrics(org.apache.kafka.common.metrics.Metrics) GlobalStreamThread(org.apache.kafka.streams.processor.internals.GlobalStreamThread) MockMetricsReporter(org.apache.kafka.test.MockMetricsReporter) Assert.assertFalse(org.junit.Assert.assertFalse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) MetricsReporter(org.apache.kafka.common.metrics.MetricsReporter) Materialized(org.apache.kafka.streams.kstream.Materialized) Optional(java.util.Optional) ProcessorTopology(org.apache.kafka.streams.processor.internals.ProcessorTopology) ClientMetrics(org.apache.kafka.streams.internals.metrics.ClientMetrics) EasyMock.anyInt(org.easymock.EasyMock.anyInt) Assert.assertThrows(org.junit.Assert.assertThrows) RunWith(org.junit.runner.RunWith) CoreMatchers.not(org.hamcrest.CoreMatchers.not) HashMap(java.util.HashMap) TopologyException(org.apache.kafka.streams.errors.TopologyException) AtomicReference(java.util.concurrent.atomic.AtomicReference) UnknownStateStoreException(org.apache.kafka.streams.errors.UnknownStateStoreException) MetricsContext(org.apache.kafka.common.metrics.MetricsContext) TestName(org.junit.rules.TestName) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Admin(org.apache.kafka.clients.admin.Admin) Record(org.apache.kafka.streams.processor.api.Record) KafkaFutureImpl(org.apache.kafka.common.internals.KafkaFutureImpl) Processor(org.apache.kafka.streams.processor.api.Processor) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) PowerMockRunner(org.powermock.modules.junit4.PowerMockRunner) Before(org.junit.Before) EasyMock.anyObject(org.easymock.EasyMock.anyObject) Capture(org.easymock.Capture) IntegrationTestUtils.waitForApplicationState(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) EasyMock.anyString(org.easymock.EasyMock.anyString) TestUtils.waitForCondition(org.apache.kafka.test.TestUtils.waitForCondition) Mock(org.powermock.api.easymock.annotation.Mock) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) EasyMock(org.easymock.EasyMock) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) EasyMock.anyLong(org.easymock.EasyMock.anyLong) LogCaptureAppender(org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender) Assert(org.junit.Assert) Collections(java.util.Collections) MockProducer(org.apache.kafka.clients.producer.MockProducer) Assert.assertEquals(org.junit.Assert.assertEquals) GlobalStreamThread(org.apache.kafka.streams.processor.internals.GlobalStreamThread) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 2 with State

use of org.apache.kafka.streams.KafkaStreams.State in project kafka by apache.

the class QueryableStateIntegrationTest method createCountStream.

/**
 * Creates a typical word count topology
 */
private KafkaStreams createCountStream(final String inputTopic, final String outputTopic, final String windowOutputTopic, final String storeName, final String windowStoreName, final Properties streamsConfiguration) {
    final StreamsBuilder builder = new StreamsBuilder();
    final Serde<String> stringSerde = Serdes.String();
    final KStream<String, String> textLines = builder.stream(inputTopic, Consumed.with(stringSerde, stringSerde));
    final KGroupedStream<String, String> groupedByWord = textLines.flatMapValues((ValueMapper<String, Iterable<String>>) value -> Arrays.asList(value.split("\\W+"))).groupBy(MockMapper.selectValueMapper());
    // Create a State Store for the all time word count
    groupedByWord.count(Materialized.as(storeName + "-" + inputTopic)).toStream().to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
    // Create a Windowed State Store that contains the word count for every 1 minute
    groupedByWord.windowedBy(TimeWindows.of(ofMillis(WINDOW_SIZE))).count(Materialized.as(windowStoreName + "-" + inputTopic)).toStream((key, value) -> key.key()).to(windowOutputTopic, Produced.with(Serdes.String(), Serdes.Long()));
    return new KafkaStreams(builder.build(), streamsConfiguration);
}
Also used : StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Arrays(java.util.Arrays) Utils.mkProperties(org.apache.kafka.common.utils.Utils.mkProperties) MockTime(kafka.utils.MockTime) Instant.ofEpochMilli(java.time.Instant.ofEpochMilli) Utils.mkMap(org.apache.kafka.common.utils.Utils.mkMap) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) StoreQueryParameters.fromNameAndType(org.apache.kafka.streams.StoreQueryParameters.fromNameAndType) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) StreamsTestUtils.startKafkaStreamsAndWaitForRunningState(org.apache.kafka.test.StreamsTestUtils.startKafkaStreamsAndWaitForRunningState) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) State(org.apache.kafka.streams.KafkaStreams.State) Category(org.junit.experimental.categories.Category) KafkaStreamsTest(org.apache.kafka.streams.KafkaStreamsTest) QueryableStoreTypes(org.apache.kafka.streams.state.QueryableStoreTypes) Predicate(org.apache.kafka.streams.kstream.Predicate) Utils.mkEntry(org.apache.kafka.common.utils.Utils.mkEntry) Matchers.is(org.hamcrest.Matchers.is) ReadOnlyKeyValueStore(org.apache.kafka.streams.state.ReadOnlyKeyValueStore) KGroupedStream(org.apache.kafka.streams.kstream.KGroupedStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeSet(java.util.TreeSet) UnknownStateStoreException(org.apache.kafka.streams.errors.UnknownStateStoreException) ArrayList(java.util.ArrayList) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) KTable(org.apache.kafka.streams.kstream.KTable) IntegrationTestUtils.waitForApplicationState(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState) Properties(java.util.Properties) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Assert.assertNull(org.junit.Assert.assertNull) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) StringReader(java.io.StringReader) TreeMap(java.util.TreeMap) IntegrationTestUtils.getRunningStreams(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.getRunningStreams) TimeWindows(org.apache.kafka.streams.kstream.TimeWindows) KafkaStreams(org.apache.kafka.streams.KafkaStreams) BufferedReader(java.io.BufferedReader) ReadOnlySessionStore(org.apache.kafka.streams.state.ReadOnlySessionStore) Assert.assertEquals(org.junit.Assert.assertEquals) QueryableStoreTypes.sessionStore(org.apache.kafka.streams.state.QueryableStoreTypes.sessionStore) QueryableStoreTypes.keyValueStore(org.apache.kafka.streams.state.QueryableStoreTypes.keyValueStore) Produced(org.apache.kafka.streams.kstream.Produced) LoggerFactory(org.slf4j.LoggerFactory) IsEqual.equalTo(org.hamcrest.core.IsEqual.equalTo) Serde(org.apache.kafka.common.serialization.Serde) After(org.junit.After) Serdes(org.apache.kafka.common.serialization.Serdes) MockMapper(org.apache.kafka.test.MockMapper) KeyValue(org.apache.kafka.streams.KeyValue) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) Bytes(org.apache.kafka.common.utils.Bytes) Objects(java.util.Objects) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) Entry(java.util.Map.Entry) Duration.ofMillis(java.time.Duration.ofMillis) InvalidStateStoreException(org.apache.kafka.streams.errors.InvalidStateStoreException) StreamsConfig(org.apache.kafka.streams.StreamsConfig) ReadOnlyWindowStore(org.apache.kafka.streams.state.ReadOnlyWindowStore) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) IntegrationTestUtils.startApplicationAndWaitUntilRunning(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.startApplicationAndWaitUntilRunning) BeforeClass(org.junit.BeforeClass) Assert.assertThrows(org.junit.Assert.assertThrows) IntegrationTest(org.apache.kafka.test.IntegrationTest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) KStream(org.apache.kafka.streams.kstream.KStream) Duration.ofSeconds(java.time.Duration.ofSeconds) NoRetryException(org.apache.kafka.test.NoRetryException) HashSet(java.util.HashSet) TestUtils.retryOnExceptionWithTimeout(org.apache.kafka.test.TestUtils.retryOnExceptionWithTimeout) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper) PrintStream(java.io.PrintStream) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Logger(org.slf4j.Logger) Consumed(org.apache.kafka.streams.kstream.Consumed) Matchers(org.hamcrest.Matchers) TimeUnit(java.util.concurrent.TimeUnit) KeyValueIterator(org.apache.kafka.streams.state.KeyValueIterator) Rule(org.junit.Rule) LagInfo(org.apache.kafka.streams.LagInfo) WindowStoreIterator(org.apache.kafka.streams.state.WindowStoreIterator) FileReader(java.io.FileReader) Comparator(java.util.Comparator) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ValueMapper(org.apache.kafka.streams.kstream.ValueMapper)

Example 3 with State

use of org.apache.kafka.streams.KafkaStreams.State in project kafka by apache.

the class IntegrationTestUtils method startApplicationAndWaitUntilRunning.

/**
 * Starts the given {@link KafkaStreams} instances and waits for all of them to reach the
 * {@link State#RUNNING} state at the same time. Note that states may change between the time
 * that this method returns and the calling function executes its next statement.<p>
 *
 * If the application is already started, use {@link #waitForApplicationState(List, State, Duration)}
 * to wait for instances to reach {@link State#RUNNING} state.
 *
 * @param streamsList the list of streams instances to run.
 * @param timeout the time to wait for the streams to all be in {@link State#RUNNING} state.
 */
public static void startApplicationAndWaitUntilRunning(final List<KafkaStreams> streamsList, final Duration timeout) throws Exception {
    final Lock stateLock = new ReentrantLock();
    final Condition stateUpdate = stateLock.newCondition();
    final Map<KafkaStreams, State> stateMap = new HashMap<>();
    for (final KafkaStreams streams : streamsList) {
        stateMap.put(streams, streams.state());
        final StateListener prevStateListener = getStateListener(streams);
        final StateListener newStateListener = (newState, oldState) -> {
            stateLock.lock();
            try {
                stateMap.put(streams, newState);
                if (newState == State.RUNNING) {
                    if (stateMap.values().stream().allMatch(state -> state == State.RUNNING)) {
                        stateUpdate.signalAll();
                    }
                }
            } finally {
                stateLock.unlock();
            }
        };
        streams.setStateListener(prevStateListener != null ? new CompositeStateListener(prevStateListener, newStateListener) : newStateListener);
    }
    for (final KafkaStreams streams : streamsList) {
        streams.start();
    }
    final long expectedEnd = System.currentTimeMillis() + timeout.toMillis();
    stateLock.lock();
    try {
        // timeout has expired
        while (true) {
            final Map<KafkaStreams, State> nonRunningStreams = new HashMap<>();
            for (final Entry<KafkaStreams, State> entry : stateMap.entrySet()) {
                if (entry.getValue() != State.RUNNING) {
                    nonRunningStreams.put(entry.getKey(), entry.getValue());
                }
            }
            if (nonRunningStreams.isEmpty()) {
                return;
            }
            final long millisRemaining = expectedEnd - System.currentTimeMillis();
            if (millisRemaining <= 0) {
                fail("Application did not reach a RUNNING state for all streams instances. " + "Non-running instances: " + nonRunningStreams);
            }
            stateUpdate.await(millisRemaining, TimeUnit.MILLISECONDS);
        }
    } finally {
        stateLock.unlock();
    }
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) TestCondition(org.apache.kafka.test.TestCondition) TestUtils.waitForCondition(org.apache.kafka.test.TestUtils.waitForCondition) Condition(java.util.concurrent.locks.Condition) KafkaStreamsNamedTopologyWrapper(org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper) LoggerFactory(org.slf4j.LoggerFactory) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) TestCondition(org.apache.kafka.test.TestCondition) Future(java.util.concurrent.Future) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Duration(java.time.Duration) Map(java.util.Map) Metric(org.apache.kafka.common.Metric) Assert.fail(org.junit.Assert.fail) Request(kafka.api.Request) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) Time(org.apache.kafka.common.utils.Time) TestUtils(org.apache.kafka.test.TestUtils) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) FailureReason(org.apache.kafka.streams.query.FailureReason) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) State(org.apache.kafka.streams.KafkaStreams.State) StateRestoreListener(org.apache.kafka.streams.processor.StateRestoreListener) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) StateQueryResult(org.apache.kafka.streams.query.StateQueryResult) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Entry(java.util.Map.Entry) Optional(java.util.Optional) Matchers.is(org.hamcrest.Matchers.is) InvalidStateStoreException(org.apache.kafka.streams.errors.InvalidStateStoreException) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) StreamsConfig(org.apache.kafka.streams.StreamsConfig) QueryableStoreType(org.apache.kafka.streams.state.QueryableStoreType) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Headers(org.apache.kafka.common.header.Headers) ConsumerGroupDescription(org.apache.kafka.clients.admin.ConsumerGroupDescription) HashMap(java.util.HashMap) AssignmentListener(org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentListener) ArrayList(java.util.ArrayList) TestUtils.retryOnExceptionWithTimeout(org.apache.kafka.test.TestUtils.retryOnExceptionWithTimeout) StateQueryRequest(org.apache.kafka.streams.query.StateQueryRequest) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) TestName(org.junit.rules.TestName) UpdateMetadataPartitionState(org.apache.kafka.common.message.UpdateMetadataRequestData.UpdateMetadataPartitionState) Admin(org.apache.kafka.clients.admin.Admin) KafkaServer(kafka.server.KafkaServer) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) LinkedList(java.util.LinkedList) QueryResult(org.apache.kafka.streams.query.QueryResult) Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) TimeoutException(org.apache.kafka.common.errors.TimeoutException) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Logger(org.slf4j.Logger) Properties(java.util.Properties) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Producer(org.apache.kafka.clients.producer.Producer) StreamThread(org.apache.kafka.streams.processor.internals.StreamThread) TestUtils.waitForCondition(org.apache.kafka.test.TestUtils.waitForCondition) ThreadStateTransitionValidator(org.apache.kafka.streams.processor.internals.ThreadStateTransitionValidator) IOException(java.io.IOException) Option(scala.Option) Field(java.lang.reflect.Field) KeyValueTimestamp(org.apache.kafka.streams.KeyValueTimestamp) File(java.io.File) StateListener(org.apache.kafka.streams.KafkaStreams.StateListener) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) Condition(java.util.concurrent.locks.Condition) Lock(java.util.concurrent.locks.Lock) Paths(java.nio.file.Paths) MetadataCache(kafka.server.MetadataCache) StoreQueryParameters(org.apache.kafka.streams.StoreQueryParameters) Utils.sleep(org.apache.kafka.common.utils.Utils.sleep) KafkaStreams(org.apache.kafka.streams.KafkaStreams) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) State(org.apache.kafka.streams.KafkaStreams.State) UpdateMetadataPartitionState(org.apache.kafka.common.message.UpdateMetadataRequestData.UpdateMetadataPartitionState) StateListener(org.apache.kafka.streams.KafkaStreams.StateListener) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock)

Example 4 with State

use of org.apache.kafka.streams.KafkaStreams.State in project kafka by apache.

the class EosV2UpgradeIntegrationTest method shouldUpgradeFromEosAlphaToEosV2.

@SuppressWarnings("deprecation")
@Test
public void shouldUpgradeFromEosAlphaToEosV2() throws Exception {
    // We use two KafkaStreams clients that we upgrade from eos-alpha to eos-V2. During the upgrade,
    // we ensure that there are pending transaction and verify that data is processed correctly.
    // 
    // We either close clients cleanly (`injectError = false`) or let them crash (`injectError = true`) during
    // the upgrade. For both cases, EOS should not be violated.
    // 
    // Additionally, we inject errors while one client is on eos-alpha while the other client is on eos-V2:
    // For this case, we inject the error during task commit phase, i.e., after offsets are appended to a TX,
    // and before the TX is committed. The goal is to verify that the written but uncommitted offsets are not
    // picked up, i.e., GroupCoordinator fencing works correctly.
    // 
    // The commit interval is set to MAX_VALUE and the used `Processor` request commits manually so we have full
    // control when a commit actually happens. We use an input topic with 4 partitions and each task will request
    // a commit after processing 10 records.
    // 
    // 1.  start both clients and wait until rebalance stabilizes
    // 2.  write 10 records per input topic partition and verify that the result was committed
    // 3.  write 5 records per input topic partition to get pending transactions (verified via "read_uncommitted" mode)
    // - all 4 pending transactions are based on task producers
    // - we will get only 4 pending writes for one partition for the crash case as we crash processing the 5th record
    // 4.  stop/crash the first client, wait until rebalance stabilizes:
    // - stop case:
    // * verify that the stopped client did commit its pending transaction during shutdown
    // * the second client will still have two pending transaction
    // - crash case:
    // * the pending transactions of the crashed client got aborted
    // * the second client will have four pending transactions
    // 5.  restart the first client with eos-V2 enabled and wait until rebalance stabilizes
    // - the rebalance should result in a commit of all tasks
    // 6.  write 5 record per input topic partition
    // - stop case:
    // * verify that the result was committed
    // - crash case:
    // * fail the second (i.e., eos-alpha) client during commit
    // * the eos-V2 client should not pickup the pending offsets
    // * verify uncommitted and committed result
    // 7.  only for crash case:
    // 7a. restart the second client in eos-alpha mode and wait until rebalance stabilizes
    // 7b. write 10 records per input topic partition
    // * fail the first (i.e., eos-V2) client during commit
    // * the eos-alpha client should not pickup the pending offsets
    // * verify uncommitted and committed result
    // 7c. restart the first client in eos-V2 mode and wait until rebalance stabilizes
    // 8.  write 5 records per input topic partition to get pending transactions (verified via "read_uncommitted" mode)
    // - 2 transaction are base on a task producer; one transaction is based on a thread producer
    // - we will get 4 pending writes for the crash case as we crash processing the 5th record
    // 9.  stop/crash the second client and wait until rebalance stabilizes:
    // - stop only:
    // * verify that the stopped client did commit its pending transaction during shutdown
    // * the first client will still have one pending transaction
    // - crash case:
    // * the pending transactions of the crashed client got aborted
    // * the first client will have one pending transactions
    // 10. restart the second client with eos-V2 enabled and wait until rebalance stabilizes
    // - the rebalance should result in a commit of all tasks
    // 11. write 5 record per input topic partition and verify that the result was committed
    final List<KeyValue<KafkaStreams.State, KafkaStreams.State>> stateTransitions1 = new LinkedList<>();
    KafkaStreams streams1Alpha = null;
    KafkaStreams streams1V2 = null;
    KafkaStreams streams1V2Two = null;
    final List<KeyValue<KafkaStreams.State, KafkaStreams.State>> stateTransitions2 = new LinkedList<>();
    KafkaStreams streams2Alpha = null;
    KafkaStreams streams2AlphaTwo = null;
    KafkaStreams streams2V2 = null;
    try {
        // phase 1: start both clients
        streams1Alpha = getKafkaStreams(APP_DIR_1, StreamsConfig.EXACTLY_ONCE);
        streams1Alpha.setStateListener((newState, oldState) -> stateTransitions1.add(KeyValue.pair(oldState, newState)));
        assignmentListener.prepareForRebalance();
        streams1Alpha.cleanUp();
        streams1Alpha.start();
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions1);
        streams2Alpha = getKafkaStreams(APP_DIR_2, StreamsConfig.EXACTLY_ONCE);
        streams2Alpha.setStateListener((newState, oldState) -> stateTransitions2.add(KeyValue.pair(oldState, newState)));
        stateTransitions1.clear();
        assignmentListener.prepareForRebalance();
        streams2Alpha.cleanUp();
        streams2Alpha.start();
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions1);
        waitForRunning(stateTransitions2);
        // in all phases, we write comments that assume that p-0/p-1 are assigned to the first client
        // and p-2/p-3 are assigned to the second client (in reality the assignment might be different though)
        // phase 2: (write first batch of data)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // p-0: ---> 10 rec + C
        // p-1: ---> 10 rec + C
        // p-2: ---> 10 rec + C
        // p-3: ---> 10 rec + C
        final List<KeyValue<Long, Long>> committedInputDataBeforeUpgrade = prepareData(0L, 10L, 0L, 1L, 2L, 3L);
        writeInputData(committedInputDataBeforeUpgrade);
        waitForCondition(() -> commitRequested.get() == 4, MAX_WAIT_TIME_MS, "SteamsTasks did not request commit.");
        final Map<Long, Long> committedState = new HashMap<>();
        final List<KeyValue<Long, Long>> expectedUncommittedResult = computeExpectedResult(committedInputDataBeforeUpgrade, committedState);
        verifyCommitted(expectedUncommittedResult);
        // phase 3: (write partial second batch of data)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case:
        // p-0: 10 rec + C ---> 5 rec (pending)
        // p-1: 10 rec + C ---> 5 rec (pending)
        // p-2: 10 rec + C ---> 5 rec (pending)
        // p-3: 10 rec + C ---> 5 rec (pending)
        // crash case: (we just assumes that we inject the error for p-0; in reality it might be a different partition)
        // (we don't crash right away and write one record less)
        // p-0: 10 rec + C ---> 4 rec (pending)
        // p-1: 10 rec + C ---> 5 rec (pending)
        // p-2: 10 rec + C ---> 5 rec (pending)
        // p-3: 10 rec + C ---> 5 rec (pending)
        final Set<Long> cleanKeys = mkSet(0L, 1L, 2L, 3L);
        final Set<Long> keysFirstClientAlpha = keysFromInstance(streams1Alpha);
        final long firstFailingKeyForCrashCase = keysFirstClientAlpha.iterator().next();
        cleanKeys.remove(firstFailingKeyForCrashCase);
        final List<KeyValue<Long, Long>> uncommittedInputDataBeforeFirstUpgrade = new LinkedList<>();
        final HashMap<Long, Long> uncommittedState = new HashMap<>(committedState);
        if (!injectError) {
            uncommittedInputDataBeforeFirstUpgrade.addAll(prepareData(10L, 15L, 0L, 1L, 2L, 3L));
            writeInputData(uncommittedInputDataBeforeFirstUpgrade);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataBeforeFirstUpgrade, uncommittedState));
            verifyUncommitted(expectedUncommittedResult);
        } else {
            final List<KeyValue<Long, Long>> uncommittedInputDataWithoutFailingKey = new LinkedList<>();
            for (final long key : cleanKeys) {
                uncommittedInputDataWithoutFailingKey.addAll(prepareData(10L, 15L, key));
            }
            uncommittedInputDataWithoutFailingKey.addAll(prepareData(10L, 14L, firstFailingKeyForCrashCase));
            uncommittedInputDataBeforeFirstUpgrade.addAll(uncommittedInputDataWithoutFailingKey);
            writeInputData(uncommittedInputDataWithoutFailingKey);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataWithoutFailingKey, new HashMap<>(committedState)));
            verifyUncommitted(expectedUncommittedResult);
        }
        // phase 4: (stop first client)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case: (client 1 will commit its two tasks on close())
        // p-0: 10 rec + C   +   5 rec ---> C
        // p-1: 10 rec + C   +   5 rec ---> C
        // p-2: 10 rec + C   +   5 rec (pending)
        // p-3: 10 rec + C   +   5 rec (pending)
        // crash case: (we write the last record that will trigger the crash; both TX from client 1 will be aborted
        // during fail over by client 2 and retried)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec (pending)
        // p-1: 10 rec + C   +   5 rec + A + 5 rec (pending)
        // p-2: 10 rec + C   +   5 rec (pending)
        // p-3: 10 rec + C   +   5 rec (pending)
        stateTransitions2.clear();
        assignmentListener.prepareForRebalance();
        if (!injectError) {
            stateTransitions1.clear();
            streams1Alpha.close();
            waitForStateTransition(stateTransitions1, CLOSE);
        } else {
            errorInjectedClient1.set(true);
            final List<KeyValue<Long, Long>> dataPotentiallyFirstFailingKey = prepareData(14L, 15L, firstFailingKeyForCrashCase);
            uncommittedInputDataBeforeFirstUpgrade.addAll(dataPotentiallyFirstFailingKey);
            writeInputData(dataPotentiallyFirstFailingKey);
        }
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions2);
        if (!injectError) {
            final List<KeyValue<Long, Long>> committedInputDataDuringFirstUpgrade = uncommittedInputDataBeforeFirstUpgrade.stream().filter(pair -> keysFirstClientAlpha.contains(pair.key)).collect(Collectors.toList());
            final List<KeyValue<Long, Long>> expectedCommittedResult = computeExpectedResult(committedInputDataDuringFirstUpgrade, committedState);
            verifyCommitted(expectedCommittedResult);
        } else {
            // retrying TX
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataBeforeFirstUpgrade.stream().filter(pair -> keysFirstClientAlpha.contains(pair.key)).collect(Collectors.toList()), new HashMap<>(committedState)));
            verifyUncommitted(expectedUncommittedResult);
            waitForStateTransitionContains(stateTransitions1, CRASH);
            errorInjectedClient1.set(false);
            stateTransitions1.clear();
            streams1Alpha.close();
            assertFalse(UNEXPECTED_EXCEPTION_MSG, hasUnexpectedError);
        }
        // phase 5: (restart first client)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case: (client 2 (alpha) will commit the two revoked task that migrate back to client 1)
        // (note: we may or may not get newly committed data, depending if the already committed tasks
        // migrate back to client 1, or different tasks)
        // (below we show the case for which we don't get newly committed data)
        // p-0: 10 rec + C   +   5 rec ---> C
        // p-1: 10 rec + C   +   5 rec ---> C
        // p-2: 10 rec + C   +   5 rec (pending)
        // p-3: 10 rec + C   +   5 rec (pending)
        // crash case: (client 2 (alpha) will commit all tasks even only two tasks are revoked and migrate back to client 1)
        // (note: because nothing was committed originally, we always get newly committed data)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec ---> C
        // p-1: 10 rec + C   +   5 rec + A + 5 rec ---> C
        // p-2: 10 rec + C   +   5 rec ---> C
        // p-3: 10 rec + C   +   5 rec ---> C
        commitRequested.set(0);
        stateTransitions1.clear();
        stateTransitions2.clear();
        streams1V2 = getKafkaStreams(APP_DIR_1, StreamsConfig.EXACTLY_ONCE_V2);
        streams1V2.setStateListener((newState, oldState) -> stateTransitions1.add(KeyValue.pair(oldState, newState)));
        assignmentListener.prepareForRebalance();
        streams1V2.start();
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions1);
        waitForRunning(stateTransitions2);
        final Set<Long> newlyCommittedKeys;
        if (!injectError) {
            newlyCommittedKeys = keysFromInstance(streams1V2);
            newlyCommittedKeys.removeAll(keysFirstClientAlpha);
        } else {
            newlyCommittedKeys = mkSet(0L, 1L, 2L, 3L);
        }
        final List<KeyValue<Long, Long>> expectedCommittedResultAfterRestartFirstClient = computeExpectedResult(uncommittedInputDataBeforeFirstUpgrade.stream().filter(pair -> newlyCommittedKeys.contains(pair.key)).collect(Collectors.toList()), committedState);
        verifyCommitted(expectedCommittedResultAfterRestartFirstClient);
        // phase 6: (complete second batch of data; crash: let second client fail on commit)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case: (both client commit regularly)
        // (depending on the task movement in phase 5, we may or may not get newly committed data;
        // we show the case for which p-2 and p-3 are newly committed below)
        // p-0: 10 rec + C   +   5 rec + C ---> 5 rec + C
        // p-1: 10 rec + C   +   5 rec + C ---> 5 rec + C
        // p-2: 10 rec + C   +   5 rec     ---> 5 rec + C
        // p-3: 10 rec + C   +   5 rec     ---> 5 rec + C
        // crash case: (second/alpha client fails and both TX are aborted)
        // (first/V2 client reprocessed the 10 records and commits TX)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec + C ---> 5 rec + C
        // p-1: 10 rec + C   +   5 rec + A + 5 rec + C ---> 5 rec + C
        // p-2: 10 rec + C   +   5 rec + C             ---> 5 rec + A + 5 rec + C
        // p-3: 10 rec + C   +   5 rec + C             ---> 5 rec + A + 5 rec + C
        commitCounterClient1.set(0);
        if (!injectError) {
            final List<KeyValue<Long, Long>> finishSecondBatch = prepareData(15L, 20L, 0L, 1L, 2L, 3L);
            writeInputData(finishSecondBatch);
            final List<KeyValue<Long, Long>> committedInputDataDuringUpgrade = uncommittedInputDataBeforeFirstUpgrade.stream().filter(pair -> !keysFirstClientAlpha.contains(pair.key)).filter(pair -> !newlyCommittedKeys.contains(pair.key)).collect(Collectors.toList());
            committedInputDataDuringUpgrade.addAll(finishSecondBatch);
            expectedUncommittedResult.addAll(computeExpectedResult(finishSecondBatch, uncommittedState));
            final List<KeyValue<Long, Long>> expectedCommittedResult = computeExpectedResult(committedInputDataDuringUpgrade, committedState);
            verifyCommitted(expectedCommittedResult);
        } else {
            final Set<Long> keysFirstClientV2 = keysFromInstance(streams1V2);
            final Set<Long> keysSecondClientAlpha = keysFromInstance(streams2Alpha);
            final List<KeyValue<Long, Long>> committedInputDataAfterFirstUpgrade = prepareData(15L, 20L, keysFirstClientV2.toArray(new Long[0]));
            writeInputData(committedInputDataAfterFirstUpgrade);
            final List<KeyValue<Long, Long>> expectedCommittedResultBeforeFailure = computeExpectedResult(committedInputDataAfterFirstUpgrade, committedState);
            verifyCommitted(expectedCommittedResultBeforeFailure);
            expectedUncommittedResult.addAll(expectedCommittedResultBeforeFailure);
            commitCounterClient2.set(0);
            final Iterator<Long> it = keysSecondClientAlpha.iterator();
            final Long otherKey = it.next();
            final Long failingKey = it.next();
            final List<KeyValue<Long, Long>> uncommittedInputDataAfterFirstUpgrade = prepareData(15L, 19L, keysSecondClientAlpha.toArray(new Long[0]));
            uncommittedInputDataAfterFirstUpgrade.addAll(prepareData(19L, 20L, otherKey));
            writeInputData(uncommittedInputDataAfterFirstUpgrade);
            uncommittedState.putAll(committedState);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataAfterFirstUpgrade, uncommittedState));
            verifyUncommitted(expectedUncommittedResult);
            stateTransitions1.clear();
            stateTransitions2.clear();
            assignmentListener.prepareForRebalance();
            commitCounterClient1.set(0);
            commitErrorInjectedClient2.set(true);
            final List<KeyValue<Long, Long>> dataFailingKey = prepareData(19L, 20L, failingKey);
            uncommittedInputDataAfterFirstUpgrade.addAll(dataFailingKey);
            writeInputData(dataFailingKey);
            expectedUncommittedResult.addAll(computeExpectedResult(dataFailingKey, uncommittedState));
            verifyUncommitted(expectedUncommittedResult);
            assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
            waitForStateTransitionContains(stateTransitions2, CRASH);
            commitErrorInjectedClient2.set(false);
            stateTransitions2.clear();
            streams2Alpha.close();
            assertFalse(UNEXPECTED_EXCEPTION_MSG, hasUnexpectedError);
            final List<KeyValue<Long, Long>> expectedCommittedResultAfterFailure = computeExpectedResult(uncommittedInputDataAfterFirstUpgrade, committedState);
            verifyCommitted(expectedCommittedResultAfterFailure);
            expectedUncommittedResult.addAll(expectedCommittedResultAfterFailure);
        }
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C ---> 10 rec + C
        if (!injectError) {
            streams2AlphaTwo = streams2Alpha;
        } else {
            // 7a restart the second client in eos-alpha mode and wait until rebalance stabilizes
            commitCounterClient1.set(0);
            commitCounterClient2.set(-1);
            stateTransitions1.clear();
            stateTransitions2.clear();
            streams2AlphaTwo = getKafkaStreams(APP_DIR_2, StreamsConfig.EXACTLY_ONCE);
            streams2AlphaTwo.setStateListener((newState, oldState) -> stateTransitions2.add(KeyValue.pair(oldState, newState)));
            assignmentListener.prepareForRebalance();
            streams2AlphaTwo.start();
            assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
            waitForRunning(stateTransitions1);
            waitForRunning(stateTransitions2);
            // 7b. write third batch of input data
            final Set<Long> keysFirstClientV2 = keysFromInstance(streams1V2);
            final Set<Long> keysSecondClientAlphaTwo = keysFromInstance(streams2AlphaTwo);
            final List<KeyValue<Long, Long>> committedInputDataBetweenUpgrades = prepareData(20L, 30L, keysSecondClientAlphaTwo.toArray(new Long[0]));
            writeInputData(committedInputDataBetweenUpgrades);
            final List<KeyValue<Long, Long>> expectedCommittedResultBeforeFailure = computeExpectedResult(committedInputDataBetweenUpgrades, committedState);
            verifyCommitted(expectedCommittedResultBeforeFailure);
            expectedUncommittedResult.addAll(expectedCommittedResultBeforeFailure);
            commitCounterClient2.set(0);
            final Iterator<Long> it = keysFirstClientV2.iterator();
            final Long otherKey = it.next();
            final Long failingKey = it.next();
            final List<KeyValue<Long, Long>> uncommittedInputDataBetweenUpgrade = prepareData(20L, 29L, keysFirstClientV2.toArray(new Long[0]));
            uncommittedInputDataBetweenUpgrade.addAll(prepareData(29L, 30L, otherKey));
            writeInputData(uncommittedInputDataBetweenUpgrade);
            uncommittedState.putAll(committedState);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataBetweenUpgrade, uncommittedState));
            verifyUncommitted(expectedUncommittedResult);
            stateTransitions1.clear();
            stateTransitions2.clear();
            assignmentListener.prepareForRebalance();
            commitCounterClient2.set(0);
            commitErrorInjectedClient1.set(true);
            final List<KeyValue<Long, Long>> dataFailingKey = prepareData(29L, 30L, failingKey);
            uncommittedInputDataBetweenUpgrade.addAll(dataFailingKey);
            writeInputData(dataFailingKey);
            expectedUncommittedResult.addAll(computeExpectedResult(dataFailingKey, uncommittedState));
            verifyUncommitted(expectedUncommittedResult);
            assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
            waitForStateTransitionContains(stateTransitions1, CRASH);
            commitErrorInjectedClient1.set(false);
            stateTransitions1.clear();
            streams1V2.close();
            assertFalse(UNEXPECTED_EXCEPTION_MSG, hasUnexpectedError);
            final List<KeyValue<Long, Long>> expectedCommittedResultAfterFailure = computeExpectedResult(uncommittedInputDataBetweenUpgrade, committedState);
            verifyCommitted(expectedCommittedResultAfterFailure);
            expectedUncommittedResult.addAll(expectedCommittedResultAfterFailure);
            // 7c. restart the first client in eos-V2 mode and wait until rebalance stabilizes
            stateTransitions1.clear();
            stateTransitions2.clear();
            streams1V2Two = getKafkaStreams(APP_DIR_1, StreamsConfig.EXACTLY_ONCE_V2);
            streams1V2Two.setStateListener((newState, oldState) -> stateTransitions1.add(KeyValue.pair(oldState, newState)));
            assignmentListener.prepareForRebalance();
            streams1V2Two.start();
            assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
            waitForRunning(stateTransitions1);
            waitForRunning(stateTransitions2);
        }
        // phase 8: (write partial last batch of data)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case:
        // p-0: 10 rec + C   +   5 rec + C + 5 rec + C ---> 5 rec (pending)
        // p-1: 10 rec + C   +   5 rec + C + 5 rec + C ---> 5 rec (pending)
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + C ---> 5 rec (pending)
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + C ---> 5 rec (pending)
        // crash case: (we just assumes that we inject the error for p-2; in reality it might be a different partition)
        // (we don't crash right away and write one record less)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C ---> 5 rec (pending)
        // p-1: 10 rec + C   +   5 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C ---> 5 rec (pending)
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C              ---> 4 rec (pending)
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C              ---> 5 rec (pending)
        cleanKeys.addAll(mkSet(0L, 1L, 2L, 3L));
        final Set<Long> keysSecondClientAlphaTwo = keysFromInstance(streams2AlphaTwo);
        final long secondFailingKeyForCrashCase = keysSecondClientAlphaTwo.iterator().next();
        cleanKeys.remove(secondFailingKeyForCrashCase);
        final List<KeyValue<Long, Long>> uncommittedInputDataBeforeSecondUpgrade = new LinkedList<>();
        if (!injectError) {
            uncommittedInputDataBeforeSecondUpgrade.addAll(prepareData(30L, 35L, 0L, 1L, 2L, 3L));
            writeInputData(uncommittedInputDataBeforeSecondUpgrade);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataBeforeSecondUpgrade, new HashMap<>(committedState)));
            verifyUncommitted(expectedUncommittedResult);
        } else {
            final List<KeyValue<Long, Long>> uncommittedInputDataWithoutFailingKey = new LinkedList<>();
            for (final long key : cleanKeys) {
                uncommittedInputDataWithoutFailingKey.addAll(prepareData(30L, 35L, key));
            }
            uncommittedInputDataWithoutFailingKey.addAll(prepareData(30L, 34L, secondFailingKeyForCrashCase));
            uncommittedInputDataBeforeSecondUpgrade.addAll(uncommittedInputDataWithoutFailingKey);
            writeInputData(uncommittedInputDataWithoutFailingKey);
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataWithoutFailingKey, new HashMap<>(committedState)));
            verifyUncommitted(expectedUncommittedResult);
        }
        // phase 9: (stop/crash second client)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case: (client 2 (alpha) will commit its two tasks on close())
        // p-0: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec (pending)
        // p-1: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec (pending)
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec ---> C
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec ---> C
        // crash case: (we write the last record that will trigger the crash; both TX from client 2 will be aborted
        // during fail over by client 1 and retried)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec (pending)
        // p-1: 10 rec + C   +   5 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec (pending)
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   4 rec ---> A + 5 rec (pending)
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   5 rec ---> A + 5 rec (pending)
        stateTransitions1.clear();
        assignmentListener.prepareForRebalance();
        if (!injectError) {
            stateTransitions2.clear();
            streams2AlphaTwo.close();
            waitForStateTransition(stateTransitions2, CLOSE);
        } else {
            errorInjectedClient2.set(true);
            final List<KeyValue<Long, Long>> dataPotentiallySecondFailingKey = prepareData(34L, 35L, secondFailingKeyForCrashCase);
            uncommittedInputDataBeforeSecondUpgrade.addAll(dataPotentiallySecondFailingKey);
            writeInputData(dataPotentiallySecondFailingKey);
        }
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions1);
        if (!injectError) {
            final List<KeyValue<Long, Long>> committedInputDataDuringSecondUpgrade = uncommittedInputDataBeforeSecondUpgrade.stream().filter(pair -> keysSecondClientAlphaTwo.contains(pair.key)).collect(Collectors.toList());
            final List<KeyValue<Long, Long>> expectedCommittedResult = computeExpectedResult(committedInputDataDuringSecondUpgrade, committedState);
            verifyCommitted(expectedCommittedResult);
        } else {
            // retrying TX
            expectedUncommittedResult.addAll(computeExpectedResult(uncommittedInputDataBeforeSecondUpgrade.stream().filter(pair -> keysSecondClientAlphaTwo.contains(pair.key)).collect(Collectors.toList()), new HashMap<>(committedState)));
            verifyUncommitted(expectedUncommittedResult);
            waitForStateTransitionContains(stateTransitions2, CRASH);
            errorInjectedClient2.set(false);
            stateTransitions2.clear();
            streams2AlphaTwo.close();
            assertFalse(UNEXPECTED_EXCEPTION_MSG, hasUnexpectedError);
        }
        // phase 10: (restart second client)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // the state below indicate the case for which the "original" tasks of client2 are migrated back to client2
        // if a task "switch" happens, we might get additional commits (omitted in the comment for brevity)
        // 
        // stop case: (client 1 (V2) will commit all four tasks if at least one revoked and migrate task needs committing back to client 2)
        // p-0: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec ---> C
        // p-1: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec ---> C
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C
        // crash case: (client 1 (V2) will commit all four tasks even only two are migrate back to client 2)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec ---> C
        // p-1: 10 rec + C   +   5 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec ---> C
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   4 rec + A + 5 rec ---> C
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   5 rec + A + 5 rec ---> C
        commitRequested.set(0);
        stateTransitions1.clear();
        stateTransitions2.clear();
        streams2V2 = getKafkaStreams(APP_DIR_1, StreamsConfig.EXACTLY_ONCE_V2);
        streams2V2.setStateListener((newState, oldState) -> stateTransitions2.add(KeyValue.pair(oldState, newState)));
        assignmentListener.prepareForRebalance();
        streams2V2.start();
        assignmentListener.waitForNextStableAssignment(MAX_WAIT_TIME_MS);
        waitForRunning(stateTransitions1);
        waitForRunning(stateTransitions2);
        newlyCommittedKeys.clear();
        if (!injectError) {
            newlyCommittedKeys.addAll(keysFromInstance(streams2V2));
            newlyCommittedKeys.removeAll(keysSecondClientAlphaTwo);
        } else {
            newlyCommittedKeys.addAll(mkSet(0L, 1L, 2L, 3L));
        }
        final List<KeyValue<Long, Long>> expectedCommittedResultAfterRestartSecondClient = computeExpectedResult(uncommittedInputDataBeforeSecondUpgrade.stream().filter(pair -> newlyCommittedKeys.contains(pair.key)).collect(Collectors.toList()), committedState);
        verifyCommitted(expectedCommittedResultAfterRestartSecondClient);
        // phase 11: (complete fourth batch of data)
        // expected end state per output partition (C == COMMIT; A == ABORT; ---> indicate the changes):
        // 
        // stop case:
        // p-0: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C ---> 5 rec + C
        // p-1: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C ---> 5 rec + C
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C ---> 5 rec + C
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + C   +   5 rec + C ---> 5 rec + C
        // crash case:  (we just assumes that we inject the error for p-2; in reality it might be a different partition)
        // p-0: 10 rec + C   +   4 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec + C             ---> 5 rec + C
        // p-1: 10 rec + C   +   5 rec + A + 5 rec + C + 5 rec + C   +   10 rec + A + 10 rec + C   +   5 rec + C             ---> 5 rec + C
        // p-2: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   4 rec + A + 5 rec + C ---> 5 rec + C
        // p-3: 10 rec + C   +   5 rec + C + 5 rec + A + 5 rec + C   +   10 rec + C                +   5 rec + A + 5 rec + C ---> 5 rec + C
        commitCounterClient1.set(-1);
        commitCounterClient2.set(-1);
        final List<KeyValue<Long, Long>> finishLastBatch = prepareData(35L, 40L, 0L, 1L, 2L, 3L);
        writeInputData(finishLastBatch);
        final Set<Long> uncommittedKeys = mkSet(0L, 1L, 2L, 3L);
        uncommittedKeys.removeAll(keysSecondClientAlphaTwo);
        uncommittedKeys.removeAll(newlyCommittedKeys);
        final List<KeyValue<Long, Long>> committedInputDataDuringUpgrade = uncommittedInputDataBeforeSecondUpgrade.stream().filter(pair -> uncommittedKeys.contains(pair.key)).collect(Collectors.toList());
        committedInputDataDuringUpgrade.addAll(finishLastBatch);
        final List<KeyValue<Long, Long>> expectedCommittedResult = computeExpectedResult(committedInputDataDuringUpgrade, committedState);
        verifyCommitted(expectedCommittedResult);
    } finally {
        if (streams1Alpha != null) {
            streams1Alpha.close();
        }
        if (streams1V2 != null) {
            streams1V2.close();
        }
        if (streams1V2Two != null) {
            streams1V2Two.close();
        }
        if (streams2Alpha != null) {
            streams2Alpha.close();
        }
        if (streams2AlphaTwo != null) {
            streams2AlphaTwo.close();
        }
        if (streams2V2 != null) {
            streams2V2.close();
        }
    }
}
Also used : DefaultKafkaClientSupplier(org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier) Arrays(java.util.Arrays) Stores(org.apache.kafka.streams.state.Stores) Cluster(org.apache.kafka.common.Cluster) TransformerSupplier(org.apache.kafka.streams.kstream.TransformerSupplier) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StableAssignmentListener(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.StableAssignmentListener) Locale(java.util.Locale) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Duration(java.time.Duration) Map(java.util.Map) Serdes(org.apache.kafka.common.serialization.Serdes) Parameterized(org.junit.runners.Parameterized) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) State(org.apache.kafka.streams.KafkaStreams.State) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) QueryableStoreTypes(org.apache.kafka.streams.state.QueryableStoreTypes) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) ProcessorContext(org.apache.kafka.streams.processor.ProcessorContext) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Partitioner(org.apache.kafka.clients.producer.Partitioner) ReadOnlyKeyValueStore(org.apache.kafka.streams.state.ReadOnlyKeyValueStore) StreamsConfig(org.apache.kafka.streams.StreamsConfig) BeforeClass(org.junit.BeforeClass) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) RunWith(org.junit.runner.RunWith) IntegrationTest(org.apache.kafka.test.IntegrationTest) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) ByteArraySerializer(org.apache.kafka.common.serialization.ByteArraySerializer) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) LinkedList(java.util.LinkedList) ProducerConfig(org.apache.kafka.clients.producer.ProducerConfig) Before(org.junit.Before) Utils(org.apache.kafka.common.utils.Utils) StreamsBuilder(org.apache.kafka.streams.StreamsBuilder) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) Iterator(java.util.Iterator) Producer(org.apache.kafka.clients.producer.Producer) Transformer(org.apache.kafka.streams.kstream.Transformer) TestUtils.waitForCondition(org.apache.kafka.test.TestUtils.waitForCondition) Test(org.junit.Test) IOException(java.io.IOException) StoreBuilder(org.apache.kafka.streams.state.StoreBuilder) File(java.io.File) IsolationLevel(org.apache.kafka.common.IsolationLevel) KeyValueIterator(org.apache.kafka.streams.state.KeyValueIterator) InternalConfig(org.apache.kafka.streams.StreamsConfig.InternalConfig) StoreQueryParameters(org.apache.kafka.streams.StoreQueryParameters) KafkaStreams(org.apache.kafka.streams.KafkaStreams) StreamsTestUtils(org.apache.kafka.test.StreamsTestUtils) Collections(java.util.Collections) KafkaStreams(org.apache.kafka.streams.KafkaStreams) KeyValue(org.apache.kafka.streams.KeyValue) HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 5 with State

use of org.apache.kafka.streams.KafkaStreams.State in project kafka by apache.

the class NamedTopologyIntegrationTest method shouldPrefixAllInternalTopicNamesWithNamedTopology.

@Test
public void shouldPrefixAllInternalTopicNamesWithNamedTopology() throws Exception {
    final String countTopologyName = "count-topology";
    final String fkjTopologyName = "FKJ-topology";
    final NamedTopologyBuilder countBuilder = streams.newNamedTopologyBuilder(countTopologyName);
    countBuilder.stream(INPUT_STREAM_1).groupBy((k, v) -> k).count();
    final NamedTopologyBuilder fkjBuilder = streams.newNamedTopologyBuilder(fkjTopologyName);
    final UniqueTopicSerdeScope serdeScope = new UniqueTopicSerdeScope();
    final KTable<String, Long> left = fkjBuilder.table(INPUT_STREAM_2, Consumed.with(serdeScope.decorateSerde(Serdes.String(), props, true), serdeScope.decorateSerde(Serdes.Long(), props, false)));
    final KTable<String, Long> right = fkjBuilder.table(INPUT_STREAM_3, Consumed.with(serdeScope.decorateSerde(Serdes.String(), props, true), serdeScope.decorateSerde(Serdes.Long(), props, false)));
    left.join(right, Object::toString, (value1, value2) -> String.valueOf(value1 + value2), Materialized.with(null, serdeScope.decorateSerde(Serdes.String(), props, false)));
    streams.start(asList(fkjBuilder.build(), countBuilder.build()));
    waitForApplicationState(singletonList(streams), State.RUNNING, Duration.ofSeconds(60));
    final String countTopicPrefix = TOPIC_PREFIX + "-" + countTopologyName;
    final String fkjTopicPrefix = TOPIC_PREFIX + "-" + fkjTopologyName;
    final Set<String> internalTopics = CLUSTER.getAllTopicsInCluster().stream().filter(t -> t.contains(TOPIC_PREFIX)).filter(t -> t.endsWith("-repartition") || t.endsWith("-changelog") || t.endsWith("-topic")).collect(Collectors.toSet());
    assertThat(internalTopics, is(mkSet(countTopicPrefix + "-KSTREAM-AGGREGATE-STATE-STORE-0000000002-repartition", countTopicPrefix + "-KSTREAM-AGGREGATE-STATE-STORE-0000000002-changelog", fkjTopicPrefix + "-KTABLE-FK-JOIN-SUBSCRIPTION-REGISTRATION-0000000006-topic", fkjTopicPrefix + "-KTABLE-FK-JOIN-SUBSCRIPTION-RESPONSE-0000000014-topic", fkjTopicPrefix + "-KTABLE-FK-JOIN-SUBSCRIPTION-STATE-STORE-0000000010-changelog", fkjTopicPrefix + "-" + INPUT_STREAM_2 + "-STATE-STORE-0000000000-changelog", fkjTopicPrefix + "-" + INPUT_STREAM_3 + "-STATE-STORE-0000000003-changelog")));
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) DefaultKafkaClientSupplier(org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier) KafkaStreamsNamedTopologyWrapper(org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper) Stores(org.apache.kafka.streams.state.Stores) StreamsException(org.apache.kafka.streams.errors.StreamsException) CoreMatchers.notNullValue(org.hamcrest.CoreMatchers.notNullValue) Collections.singletonList(java.util.Collections.singletonList) NamedTopologyBuilder(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyBuilder) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) IntegrationTestUtils.safeUniqueTestName(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName) Collections.singleton(java.util.Collections.singleton) Arrays.asList(java.util.Arrays.asList) KeyValueStore(org.apache.kafka.streams.state.KeyValueStore) Map(java.util.Map) After(org.junit.After) Duration(java.time.Duration) Serdes(org.apache.kafka.common.serialization.Serdes) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) ClientUtils.extractThreadId(org.apache.kafka.streams.processor.internals.ClientUtils.extractThreadId) MissingSourceTopicException(org.apache.kafka.streams.errors.MissingSourceTopicException) TopicPartition(org.apache.kafka.common.TopicPartition) AfterClass(org.junit.AfterClass) TestUtils(org.apache.kafka.test.TestUtils) Collection(java.util.Collection) KeyValue(org.apache.kafka.streams.KeyValue) StreamsMetadata(org.apache.kafka.streams.StreamsMetadata) Utils.mkSet(org.apache.kafka.common.utils.Utils.mkSet) LongDeserializer(org.apache.kafka.common.serialization.LongDeserializer) Set(java.util.Set) ConsumerConfig(org.apache.kafka.clients.consumer.ConsumerConfig) KafkaClientSupplier(org.apache.kafka.streams.KafkaClientSupplier) LongSerializer(org.apache.kafka.common.serialization.LongSerializer) State(org.apache.kafka.streams.KafkaStreams.State) Collectors(java.util.stream.Collectors) Bytes(org.apache.kafka.common.utils.Bytes) QueryableStoreTypes(org.apache.kafka.streams.state.QueryableStoreTypes) IntegrationTestUtils(org.apache.kafka.streams.integration.utils.IntegrationTestUtils) List(java.util.List) Materialized(org.apache.kafka.streams.kstream.Materialized) Optional(java.util.Optional) AddNamedTopologyResult(org.apache.kafka.streams.processor.internals.namedtopology.AddNamedTopologyResult) Queue(java.util.Queue) Pattern(java.util.regex.Pattern) ReadOnlyKeyValueStore(org.apache.kafka.streams.state.ReadOnlyKeyValueStore) NamedTopology(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology) StreamsConfig(org.apache.kafka.streams.StreamsConfig) BeforeClass(org.junit.BeforeClass) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) CoreMatchers.not(org.hamcrest.CoreMatchers.not) NamedTopologyStoreQueryParameters(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyStoreQueryParameters) HashMap(java.util.HashMap) KStream(org.apache.kafka.streams.kstream.KStream) TestUtils.retryOnExceptionWithTimeout(org.apache.kafka.test.TestUtils.retryOnExceptionWithTimeout) KeyValue.pair(org.apache.kafka.streams.KeyValue.pair) EmbeddedKafkaCluster(org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster) TestName(org.junit.rules.TestName) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) LinkedList(java.util.LinkedList) CoreMatchers.nullValue(org.hamcrest.CoreMatchers.nullValue) Before(org.junit.Before) IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived) KTable(org.apache.kafka.streams.kstream.KTable) IntegrationTestUtils.waitForApplicationState(org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState) Properties(java.util.Properties) StreamsUncaughtExceptionHandler(org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler) Iterator(java.util.Iterator) Consumed(org.apache.kafka.streams.kstream.Consumed) StreamsMetadataImpl(org.apache.kafka.streams.state.internals.StreamsMetadataImpl) Test(org.junit.Test) RemoveNamedTopologyResult(org.apache.kafka.streams.processor.internals.namedtopology.RemoveNamedTopologyResult) NOT_AVAILABLE(org.apache.kafka.streams.KeyQueryMetadata.NOT_AVAILABLE) Rule(org.junit.Rule) KeyQueryMetadata(org.apache.kafka.streams.KeyQueryMetadata) LagInfo(org.apache.kafka.streams.LagInfo) UniqueTopicSerdeScope(org.apache.kafka.streams.utils.UniqueTopicSerdeScope) UniqueTopicSerdeScope(org.apache.kafka.streams.utils.UniqueTopicSerdeScope) NamedTopologyBuilder(org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyBuilder) Test(org.junit.Test)

Aggregations

Duration (java.time.Duration)5 List (java.util.List)5 Map (java.util.Map)5 Properties (java.util.Properties)5 Set (java.util.Set)5 State (org.apache.kafka.streams.KafkaStreams.State)5 TestUtils (org.apache.kafka.test.TestUtils)5 MatcherAssert.assertThat (org.hamcrest.MatcherAssert.assertThat)5 Collections (java.util.Collections)4 HashMap (java.util.HashMap)4 ConsumerConfig (org.apache.kafka.clients.consumer.ConsumerConfig)4 Serdes (org.apache.kafka.common.serialization.Serdes)4 KeyValue (org.apache.kafka.streams.KeyValue)4 StreamsConfig (org.apache.kafka.streams.StreamsConfig)4 KeyValueStore (org.apache.kafka.streams.state.KeyValueStore)4 File (java.io.File)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 TimeUnit (java.util.concurrent.TimeUnit)3 TestName (org.junit.rules.TestName)3