Search in sources :

Example 1 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKafkaConsumerBaseTest method testRescaling.

/**
 * Tests whether the Kafka consumer behaves correctly when scaling the parallelism up/down,
 * which means that operator state is being reshuffled.
 *
 * <p>This also verifies that a restoring source is always impervious to changes in the list of
 * topics fetched from Kafka.
 */
@SuppressWarnings("unchecked")
private void testRescaling(final int initialParallelism, final int numPartitions, final int restoredParallelism, final int restoredNumPartitions) throws Exception {
    Preconditions.checkArgument(restoredNumPartitions >= numPartitions, "invalid test case for Kafka repartitioning; Kafka only allows increasing partitions.");
    List<KafkaTopicPartition> mockFetchedPartitionsOnStartup = new ArrayList<>();
    for (int i = 0; i < numPartitions; i++) {
        mockFetchedPartitionsOnStartup.add(new KafkaTopicPartition("test-topic", i));
    }
    DummyFlinkKafkaConsumer<String>[] consumers = new DummyFlinkKafkaConsumer[initialParallelism];
    AbstractStreamOperatorTestHarness<String>[] testHarnesses = new AbstractStreamOperatorTestHarness[initialParallelism];
    List<String> testTopics = Collections.singletonList("test-topic");
    for (int i = 0; i < initialParallelism; i++) {
        TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, initialParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsOnStartup));
        consumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
        testHarnesses[i] = createTestHarness(consumers[i], initialParallelism, i);
        // initializeState() is always called, null signals that we didn't restore
        testHarnesses[i].initializeEmptyState();
        testHarnesses[i].open();
    }
    Map<KafkaTopicPartition, Long> globalSubscribedPartitions = new HashMap<>();
    for (int i = 0; i < initialParallelism; i++) {
        Map<KafkaTopicPartition, Long> subscribedPartitions = consumers[i].getSubscribedPartitionsToStartOffsets();
        // make sure that no one else is subscribed to these partitions
        for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
            assertThat(globalSubscribedPartitions, not(hasKey(partition)));
        }
        globalSubscribedPartitions.putAll(subscribedPartitions);
    }
    assertThat(globalSubscribedPartitions.values(), hasSize(numPartitions));
    assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(globalSubscribedPartitions.keySet())));
    OperatorSubtaskState[] state = new OperatorSubtaskState[initialParallelism];
    for (int i = 0; i < initialParallelism; i++) {
        state[i] = testHarnesses[i].snapshot(0, 0);
    }
    OperatorSubtaskState mergedState = AbstractStreamOperatorTestHarness.repackageState(state);
    // -----------------------------------------------------------------------------------------
    // restore
    List<KafkaTopicPartition> mockFetchedPartitionsAfterRestore = new ArrayList<>();
    for (int i = 0; i < restoredNumPartitions; i++) {
        mockFetchedPartitionsAfterRestore.add(new KafkaTopicPartition("test-topic", i));
    }
    DummyFlinkKafkaConsumer<String>[] restoredConsumers = new DummyFlinkKafkaConsumer[restoredParallelism];
    AbstractStreamOperatorTestHarness<String>[] restoredTestHarnesses = new AbstractStreamOperatorTestHarness[restoredParallelism];
    for (int i = 0; i < restoredParallelism; i++) {
        OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(mergedState, maxParallelism, initialParallelism, restoredParallelism, i);
        TestPartitionDiscoverer partitionDiscoverer = new TestPartitionDiscoverer(new KafkaTopicsDescriptor(testTopics, null), i, restoredParallelism, TestPartitionDiscoverer.createMockGetAllTopicsSequenceFromFixedReturn(testTopics), TestPartitionDiscoverer.createMockGetAllPartitionsFromTopicsSequenceFromFixedReturn(mockFetchedPartitionsAfterRestore));
        restoredConsumers[i] = new DummyFlinkKafkaConsumer<>(testTopics, partitionDiscoverer);
        restoredTestHarnesses[i] = createTestHarness(restoredConsumers[i], restoredParallelism, i);
        // initializeState() is always called, null signals that we didn't restore
        restoredTestHarnesses[i].initializeState(initState);
        restoredTestHarnesses[i].open();
    }
    Map<KafkaTopicPartition, Long> restoredGlobalSubscribedPartitions = new HashMap<>();
    for (int i = 0; i < restoredParallelism; i++) {
        Map<KafkaTopicPartition, Long> subscribedPartitions = restoredConsumers[i].getSubscribedPartitionsToStartOffsets();
        // make sure that no one else is subscribed to these partitions
        for (KafkaTopicPartition partition : subscribedPartitions.keySet()) {
            assertThat(restoredGlobalSubscribedPartitions, not(hasKey(partition)));
        }
        restoredGlobalSubscribedPartitions.putAll(subscribedPartitions);
    }
    assertThat(restoredGlobalSubscribedPartitions.values(), hasSize(restoredNumPartitions));
    assertThat(mockFetchedPartitionsOnStartup, everyItem(isIn(restoredGlobalSubscribedPartitions.keySet())));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) TestPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.testutils.TestPartitionDiscoverer) OptionalLong(java.util.OptionalLong) KafkaTopicsDescriptor(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicsDescriptor)

Example 2 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKafkaConsumerBaseMigrationTest method writeSnapshot.

private void writeSnapshot(String path, HashMap<KafkaTopicPartition, Long> state) throws Exception {
    final OneShotLatch latch = new OneShotLatch();
    final AbstractFetcher<String, ?> fetcher = mock(AbstractFetcher.class);
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            latch.trigger();
            return null;
        }
    }).when(fetcher).runFetchLoop();
    when(fetcher.snapshotCurrentState()).thenReturn(state);
    final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
    final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(fetcher, TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
    StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.setup();
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                consumerFunction.run(new DummySourceContext() {

                    @Override
                    public void collect(String element) {
                    }
                });
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    final OperatorSubtaskState snapshot;
    synchronized (testHarness.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0L, 0L);
    }
    OperatorSnapshotUtil.writeStateHandle(snapshot, path);
    consumerOperator.close();
    runner.join();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch)

Example 3 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKafkaProducerITCase method testScaleUpAfterScalingDown.

/**
 * Each instance of FlinkKafkaProducer uses it's own pool of transactional ids. After the
 * restore from checkpoint transactional ids are redistributed across the subtasks. In case of
 * scale down, the surplus transactional ids are dropped. In case of scale up, new one are
 * generated (for the new subtasks). This test make sure that sequence of scaling down and up
 * again works fine. Especially it checks whether the newly generated ids in scaling up do not
 * overlap with ids that were used before scaling down. For example we start with 4 ids and
 * parallelism 4: [1], [2], [3], [4] - one assigned per each subtask we scale down to
 * parallelism 2: [1, 2], [3, 4] - first subtask got id 1 and 2, second got ids 3 and 4 surplus
 * ids are dropped from the pools and we scale up to parallelism 3: [1 or 2], [3 or 4], [???]
 * new subtask have to generate new id(s), but he can not use ids that are potentially in use,
 * so it has to generate new ones that are greater then 4.
 */
@Test
public void testScaleUpAfterScalingDown() throws Exception {
    String topic = "scale-up-after-scaling-down";
    final int parallelism1 = 4;
    final int parallelism2 = 2;
    final int parallelism3 = 3;
    final int maxParallelism = Math.max(parallelism1, Math.max(parallelism2, parallelism3));
    OperatorSubtaskState operatorSubtaskState = repartitionAndExecute(topic, OperatorSubtaskState.builder().build(), parallelism1, parallelism1, maxParallelism, IntStream.range(0, parallelism1).boxed().iterator());
    operatorSubtaskState = repartitionAndExecute(topic, operatorSubtaskState, parallelism1, parallelism2, maxParallelism, IntStream.range(parallelism1, parallelism1 + parallelism2).boxed().iterator());
    operatorSubtaskState = repartitionAndExecute(topic, operatorSubtaskState, parallelism2, parallelism3, maxParallelism, IntStream.range(parallelism1 + parallelism2, parallelism1 + parallelism2 + parallelism3).boxed().iterator());
    // After each previous repartitionAndExecute call, we are left with some lingering
    // transactions, that would
    // not allow us to read all committed messages from the topic. Thus we initialize operators
    // from
    // OperatorSubtaskState once more, but without any new data. This should terminate all
    // ongoing transactions.
    repartitionAndExecute(topic, operatorSubtaskState, parallelism3, 1, maxParallelism, Collections.emptyIterator());
    assertExactlyOnceForTopic(createProperties(), topic, IntStream.range(0, parallelism1 + parallelism2 + parallelism3).boxed().collect(Collectors.toList()));
    deleteTestTopic(topic);
    checkProducerLeak();
}
Also used : OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 4 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKafkaProducerITCase method testFlinkKafkaProducerFailBeforeNotify.

/**
 * This test hangs when running it in your IDE.
 */
@Test
@Ignore
public void testFlinkKafkaProducerFailBeforeNotify() throws Exception {
    String topic = "flink-kafka-producer-fail-before-notify";
    OneInputStreamOperatorTestHarness<Integer, Object> testHarness = createTestHarness(topic);
    testHarness.setup();
    testHarness.open();
    testHarness.processElement(42, 0);
    testHarness.snapshot(0, 1);
    testHarness.processElement(43, 2);
    OperatorSubtaskState snapshot = testHarness.snapshot(1, 3);
    int leaderId = kafkaServer.getLeaderToShutDown(topic);
    failBroker(leaderId);
    try {
        testHarness.processElement(44, 4);
        testHarness.snapshot(2, 5);
        fail();
    } catch (Exception ex) {
    // expected
    }
    try {
        testHarness.close();
    } catch (Exception ex) {
    }
    kafkaServer.restartBroker(leaderId);
    testHarness = createTestHarness(topic);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.close();
    assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 43));
    deleteTestTopic(topic);
    checkProducerLeak();
}
Also used : OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) ProducerFencedException(org.apache.kafka.common.errors.ProducerFencedException) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKafkaProducerITCase method testRestoreUsingDifferentTransactionalIdPrefix.

@Test
public void testRestoreUsingDifferentTransactionalIdPrefix() throws Exception {
    String topic = "testCustomizeTransactionalIdPrefix";
    Properties properties = createProperties();
    final String transactionalIdPrefix1 = "my-prefix1";
    FlinkKafkaProducer<Integer> kafkaProducer1 = new FlinkKafkaProducer<>(topic, integerKeyedSerializationSchema, properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
    kafkaProducer1.setTransactionalIdPrefix(transactionalIdPrefix1);
    OperatorSubtaskState snapshot;
    try (OneInputStreamOperatorTestHarness<Integer, Object> testHarness1 = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(kafkaProducer1), IntSerializer.INSTANCE)) {
        testHarness1.setup();
        testHarness1.open();
        testHarness1.processElement(42, 0);
        snapshot = testHarness1.snapshot(0, 1);
        testHarness1.processElement(43, 2);
    }
    final String transactionalIdPrefix2 = "my-prefix2";
    FlinkKafkaProducer<Integer> kafkaProducer2 = new FlinkKafkaProducer<>(topic, integerKeyedSerializationSchema, properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
    kafkaProducer2.setTransactionalIdPrefix(transactionalIdPrefix2);
    try (OneInputStreamOperatorTestHarness<Integer, Object> testHarness2 = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(kafkaProducer2), IntSerializer.INSTANCE)) {
        testHarness2.setup();
        // restore from the previous snapshot, transactions with records 43 should be aborted
        testHarness2.initializeState(snapshot);
        testHarness2.open();
        testHarness2.processElement(44, 3);
        testHarness2.snapshot(1, 4);
        testHarness2.processElement(45, 5);
        testHarness2.notifyOfCompletedCheckpoint(1);
        testHarness2.processElement(46, 6);
    }
    assertExactlyOnceForTopic(createProperties(), topic, Arrays.asList(42, 44));
    checkProducerLeak();
}
Also used : OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) Properties(java.util.Properties) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Aggregations

OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)178 Test (org.junit.Test)142 Watermark (org.apache.flink.streaming.api.watermark.Watermark)52 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)37 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)28 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)23 Map (java.util.Map)22 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)21 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)19 HashMap (java.util.HashMap)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)16 Event (org.apache.flink.cep.Event)16 SubEvent (org.apache.flink.cep.SubEvent)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)15 GenericRowData (org.apache.flink.table.data.GenericRowData)15 Ignore (org.junit.Ignore)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)14