Search in sources :

Example 6 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKafkaConsumerBaseMigrationTest method testRestoreFromEmptyStateWithPartitions.

/**
 * Test restoring from an empty state taken using a previous Flink version, when some partitions
 * could be found for topics.
 */
@Test
public void testRestoreFromEmptyStateWithPartitions() throws Exception {
    final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
    final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
    StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.setup();
    // restore state from binary snapshot file
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kafka-consumer-migration-test-flink" + testMigrateVersion + "-empty-state-snapshot"));
    testHarness.open();
    // the expected state in "kafka-consumer-migration-test-flink1.x-snapshot-empty-state";
    // all new partitions after the snapshot are considered as partitions that were created
    // while the
    // consumer wasn't running, and should start from the earliest offset.
    final HashMap<KafkaTopicPartition, Long> expectedSubscribedPartitionsWithStartOffsets = new HashMap<>();
    for (KafkaTopicPartition partition : PARTITION_STATE.keySet()) {
        expectedSubscribedPartitionsWithStartOffsets.put(partition, KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET);
    }
    // assert that there are partitions and is identical to expected list
    assertTrue(consumerFunction.getSubscribedPartitionsToStartOffsets() != null);
    assertTrue(!consumerFunction.getSubscribedPartitionsToStartOffsets().isEmpty());
    assertEquals(expectedSubscribedPartitionsWithStartOffsets, consumerFunction.getSubscribedPartitionsToStartOffsets());
    // the new partitions should have been considered as restored state
    assertTrue(consumerFunction.getRestoredState() != null);
    assertTrue(!consumerFunction.getSubscribedPartitionsToStartOffsets().isEmpty());
    for (Map.Entry<KafkaTopicPartition, Long> expectedEntry : expectedSubscribedPartitionsWithStartOffsets.entrySet()) {
        assertEquals(expectedEntry.getValue(), consumerFunction.getRestoredState().get(expectedEntry.getKey()));
    }
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : HashMap(java.util.HashMap) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 7 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKafkaConsumerBaseMigrationTest method writeSnapshot.

private void writeSnapshot(String path, HashMap<KafkaTopicPartition, Long> state) throws Exception {
    final OneShotLatch latch = new OneShotLatch();
    final AbstractFetcher<String, ?> fetcher = mock(AbstractFetcher.class);
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            latch.trigger();
            return null;
        }
    }).when(fetcher).runFetchLoop();
    when(fetcher.snapshotCurrentState()).thenReturn(state);
    final List<KafkaTopicPartition> partitions = new ArrayList<>(PARTITION_STATE.keySet());
    final DummyFlinkKafkaConsumer<String> consumerFunction = new DummyFlinkKafkaConsumer<>(fetcher, TOPICS, partitions, FlinkKafkaConsumerBase.PARTITION_DISCOVERY_DISABLED);
    StreamSource<String, DummyFlinkKafkaConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.setup();
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                consumerFunction.run(new DummySourceContext() {

                    @Override
                    public void collect(String element) {
                    }
                });
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    if (!latch.isTriggered()) {
        latch.await();
    }
    final OperatorSubtaskState snapshot;
    synchronized (testHarness.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0L, 0L);
    }
    OperatorSnapshotUtil.writeStateHandle(snapshot, path);
    consumerOperator.close();
    runner.join();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) InvocationOnMock(org.mockito.invocation.InvocationOnMock) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch)

Example 8 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class RMQSourceTest method testCheckpointing.

@Test
public void testCheckpointing() throws Exception {
    source.autoAck = false;
    StreamSource<String, RMQSource<String>> src = new StreamSource<>(source);
    AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    sourceThread.start();
    Thread.sleep(5);
    final Random random = new Random(System.currentTimeMillis());
    int numSnapshots = 50;
    long previousSnapshotId;
    long lastSnapshotId = 0;
    long totalNumberOfAcks = 0;
    for (int i = 0; i < numSnapshots; i++) {
        long snapshotId = random.nextLong();
        OperatorSubtaskState data;
        synchronized (DummySourceContext.lock) {
            data = testHarness.snapshot(snapshotId, System.currentTimeMillis());
            previousSnapshotId = lastSnapshotId;
            lastSnapshotId = messageId;
        }
        // let some time pass
        Thread.sleep(5);
        // check if the correct number of messages have been snapshotted
        final long numIds = lastSnapshotId - previousSnapshotId;
        RMQTestSource sourceCopy = new RMQTestSource();
        StreamSource<String, RMQTestSource> srcCopy = new StreamSource<>(sourceCopy);
        AbstractStreamOperatorTestHarness<String> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
        testHarnessCopy.setup();
        testHarnessCopy.initializeState(data);
        testHarnessCopy.open();
        ArrayDeque<Tuple2<Long, Set<String>>> deque = sourceCopy.getRestoredState();
        Set<String> messageIds = deque.getLast().f1;
        assertEquals(numIds, messageIds.size());
        if (messageIds.size() > 0) {
            assertTrue(messageIds.contains(Long.toString(lastSnapshotId - 1)));
        }
        // check if the messages are being acknowledged and the transaction committed
        synchronized (DummySourceContext.lock) {
            source.notifyCheckpointComplete(snapshotId);
        }
        totalNumberOfAcks += numIds;
    }
    Mockito.verify(source.channel, Mockito.times((int) totalNumberOfAcks)).basicAck(Mockito.anyLong(), Mockito.eq(false));
    Mockito.verify(source.channel, Mockito.times(numSnapshots)).txCommit();
}
Also used : StreamSource(org.apache.flink.streaming.api.operators.StreamSource) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) Random(java.util.Random) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Test(org.junit.Test)

Example 9 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestoreWithReshardedStream.

@Test
public void testRestoreWithReshardedStream() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        // setup the closed shard
        Shard closedShard = new Shard();
        closedShard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange closedSequenceNumberRange = new SequenceNumberRange();
        closedSequenceNumberRange.withStartingSequenceNumber("1");
        closedSequenceNumberRange.withEndingSequenceNumber(// this represents a closed shard
        "1087654321");
        closedShard.setSequenceNumberRange(closedSequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), closedShard));
        // setup the new shards
        Shard newSplitShard1 = new Shard();
        newSplitShard1.setShardId(KinesisShardIdGenerator.generateFromShardOrder(1));
        SequenceNumberRange newSequenceNumberRange1 = new SequenceNumberRange();
        newSequenceNumberRange1.withStartingSequenceNumber("1087654322");
        newSplitShard1.setSequenceNumberRange(newSequenceNumberRange1);
        newSplitShard1.setParentShardId(TEST_SHARD_ID);
        Shard newSplitShard2 = new Shard();
        newSplitShard2.setShardId(KinesisShardIdGenerator.generateFromShardOrder(2));
        SequenceNumberRange newSequenceNumberRange2 = new SequenceNumberRange();
        newSequenceNumberRange2.withStartingSequenceNumber("2087654322");
        newSplitShard2.setSequenceNumberRange(newSequenceNumberRange2);
        newSplitShard2.setParentShardId(TEST_SHARD_ID);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard1));
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard2));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that state is correctly restored
    assertNotEquals(null, consumerFunction.getRestoredState());
    assertEquals(1, consumerFunction.getRestoredState().size());
    assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
    // assert that the fetcher is registered with all shards, including new shards
    assertEquals(3, fetcher.getSubscribedShardsState().size());
    KinesisStreamShardState restoredClosedShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredClosedShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredClosedShardState.getStreamShardHandle().getShard().getShardId());
    assertTrue(restoredClosedShardState.getStreamShardHandle().isClosed());
    assertEquals(TEST_SEQUENCE_NUMBER, restoredClosedShardState.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard1 = fetcher.getSubscribedShardsState().get(1);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard1.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), restoredNewSplitShard1.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard1.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard1.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard2 = fetcher.getSubscribedShardsState().get(2);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard2.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), restoredNewSplitShard2.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard2.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard2.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 10 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class ContinuousFileProcessingTest method testFunctionRestore.

@Test
public void testFunctionRestore() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    org.apache.hadoop.fs.Path path = null;
    long fileModTime = Long.MIN_VALUE;
    for (int i = 0; i < 1; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        path = file.f0;
        fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
    final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    final Throwable[] error = new Throwable[1];
    final OneShotLatch latch = new OneShotLatch();
    final DummySourceContext sourceContext = new DummySourceContext() {

        @Override
        public void collect(TimestampedFileInputSplit element) {
            latch.trigger();
        }
    };
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                monitoringFunction.run(sourceContext);
            } catch (Throwable t) {
                t.printStackTrace();
                error[0] = t;
            }
        }
    };
    runner.start();
    // first condition for the source to have updated its state: emit at least one element
    if (!latch.isTriggered()) {
        latch.await();
    }
    // this means it has processed all the splits and updated its state.
    synchronized (sourceContext.getCheckpointLock()) {
    }
    OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
    monitoringFunction.cancel();
    runner.join();
    testHarness.close();
    final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);
    StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
    AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
    testHarnessCopy.initializeState(snapshot);
    testHarnessCopy.open();
    Assert.assertNull(error[0]);
    Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
    hdfs.delete(path, false);
}
Also used : Path(org.apache.flink.core.fs.Path) TimestampedFileInputSplit(org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ContinuousFileMonitoringFunction(org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) Test(org.junit.Test)

Aggregations

AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)28 Test (org.junit.Test)23 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)21 ArrayList (java.util.ArrayList)17 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)11 HashMap (java.util.HashMap)6 List (java.util.List)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)5 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)4 Shard (com.amazonaws.services.kinesis.model.Shard)4 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)4 Path (org.apache.flink.core.fs.Path)4 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)4 TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)4 KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)4 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)4 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)4 TestRuntimeContext (org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext)4 HashSet (java.util.HashSet)3