Search in sources :

Example 16 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method writeSnapshot.

// ------------------------------------------------------------------------
@SuppressWarnings("unchecked")
private void writeSnapshot(String path, HashMap<StreamShardMetadata, SequenceNumber> state) throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(state.size());
    for (StreamShardMetadata shardMetadata : state.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), state, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumer);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.setup();
    testHarness.open();
    final AtomicReference<Throwable> error = new AtomicReference<>();
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                consumer.run(new TestSourceContext<>());
            } catch (Throwable t) {
                t.printStackTrace();
                error.set(t);
            }
        }
    };
    runner.start();
    fetcher.waitUntilRun();
    final OperatorSubtaskState snapshot;
    synchronized (testHarness.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0L, 0L);
    }
    OperatorSnapshotUtil.writeStateHandle(snapshot, path);
    consumerOperator.close();
    runner.join();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) AtomicReference(java.util.concurrent.atomic.AtomicReference) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard)

Example 17 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestoreWithEmptyState.

@Test
public void testRestoreWithEmptyState() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-empty-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that no state was restored
    assertTrue(consumerFunction.getRestoredState().isEmpty());
    // although the restore state is empty, the fetcher should still have been registered the
    // initial discovered shard;
    // furthermore, the discovered shard should be considered a newly created shard while the
    // job wasn't running,
    // and therefore should be consumed from the earliest sequence number
    KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredShardState.getStreamShardHandle().isClosed());
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredShardState.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 18 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestore.

@Test
public void testRestore() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that state is correctly restored
    assertNotEquals(null, consumerFunction.getRestoredState());
    assertEquals(1, consumerFunction.getRestoredState().size());
    assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
    assertEquals(1, fetcher.getSubscribedShardsState().size());
    assertEquals(TEST_SEQUENCE_NUMBER, fetcher.getSubscribedShardsState().get(0).getLastProcessedSequenceNum());
    KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredShardState.getStreamShardHandle().isClosed());
    assertEquals(TEST_SEQUENCE_NUMBER, restoredShardState.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 19 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class RMQSourceTest method testRedeliveredSessionIDsAck.

/**
 * Tests whether redelivered messages are acknowledged properly.
 */
@Test
public void testRedeliveredSessionIDsAck() throws Exception {
    source.autoAck = false;
    StreamSource<String, RMQSource<String>> src = new StreamSource<>(source);
    AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
    testHarness.open();
    sourceThread.start();
    while (DummySourceContext.numElementsCollected < 10) {
        // wait until messages have been processed
        Thread.sleep(5);
    }
    // mock message redelivery by resetting the message ID
    long numMsgRedelivered;
    synchronized (DummySourceContext.lock) {
        numMsgRedelivered = DummySourceContext.numElementsCollected;
        messageId = 0;
    }
    while (DummySourceContext.numElementsCollected < numMsgRedelivered + 10) {
        // wait until some messages will be redelivered
        Thread.sleep(5);
    }
    // ack the messages by snapshotting the state
    final Random random = new Random(System.currentTimeMillis());
    long lastMessageId;
    long snapshotId = random.nextLong();
    synchronized (DummySourceContext.lock) {
        testHarness.snapshot(snapshotId, System.currentTimeMillis());
        source.notifyCheckpointComplete(snapshotId);
        lastMessageId = messageId;
        // check if all the messages are being collected and acknowledged
        long totalNumberOfAcks = numMsgRedelivered + lastMessageId;
        assertEquals(lastMessageId, DummySourceContext.numElementsCollected);
        assertEquals(totalNumberOfAcks, ((RMQTestSource) source).addIdCalls);
    }
    // check if all the acks are being sent
    Mockito.verify(source.channel, Mockito.times((int) lastMessageId)).basicAck(Mockito.anyLong(), Mockito.eq(false));
    Mockito.verify(source.channel, Mockito.times((int) numMsgRedelivered)).basicReject(Mockito.anyLong(), Mockito.eq(false));
}
Also used : Random(java.util.Random) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) Test(org.junit.Test)

Example 20 with AbstractStreamOperatorTestHarness

use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.

the class FlinkKinesisConsumerTest method testPeriodicWatermark.

@Test
public void testPeriodicWatermark() throws Exception {
    String streamName = "fakeStreamName";
    Time maxOutOfOrderness = Time.milliseconds(5);
    long autoWatermarkInterval = 1_000;
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {

        @Override
        public void emitWatermark(Watermark mark) {
            watermarks.add(mark);
        }

        @Override
        public void markAsTemporarilyIdle() {
        }
    };
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).start();
    shard1.put("1");
    shard1.put("2");
    shard2.put("10");
    int recordCount = 3;
    int watermarkCount = 0;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, first watermark is -3
    // - Shard-1 @2
    // - Shard-2 @10
    // - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    // advance watermark
    shard1.put("10");
    recordCount++;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, second watermark is -3
    // - Shard-1 @10
    // - Shard-2 @10
    // - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    sourceFunc.cancel();
    testHarness.close();
    assertEquals("record count", recordCount, testHarness.getOutput().size());
    assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
    assertEquals("watermark count", watermarkCount, watermarks.size());
}
Also used : HashMap(java.util.HashMap) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)28 Test (org.junit.Test)23 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)21 ArrayList (java.util.ArrayList)17 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)11 HashMap (java.util.HashMap)6 List (java.util.List)6 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)6 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)5 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)4 Shard (com.amazonaws.services.kinesis.model.Shard)4 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)4 Path (org.apache.flink.core.fs.Path)4 ContinuousFileMonitoringFunction (org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction)4 TimestampedFileInputSplit (org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit)4 KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)4 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)4 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)4 TestRuntimeContext (org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext)4 HashSet (java.util.HashSet)3