Search in sources :

Example 6 with KinesisDataFetcher

use of org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher in project flink by apache.

the class FlinkKinesisConsumerTest method testPeriodicWatermark.

@Test
public void testPeriodicWatermark() throws Exception {
    String streamName = "fakeStreamName";
    Time maxOutOfOrderness = Time.milliseconds(5);
    long autoWatermarkInterval = 1_000;
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {

        @Override
        public void emitWatermark(Watermark mark) {
            watermarks.add(mark);
        }

        @Override
        public void markAsTemporarilyIdle() {
        }
    };
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).start();
    shard1.put("1");
    shard1.put("2");
    shard2.put("10");
    int recordCount = 3;
    int watermarkCount = 0;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, first watermark is -3
    // - Shard-1 @2
    // - Shard-2 @10
    // - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    // advance watermark
    shard1.put("10");
    recordCount++;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, second watermark is -3
    // - Shard-1 @10
    // - Shard-2 @10
    // - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    sourceFunc.cancel();
    testHarness.close();
    assertEquals("record count", recordCount, testHarness.getOutput().size());
    assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
    assertEquals("watermark count", watermarkCount, watermarks.size());
}
Also used : HashMap(java.util.HashMap) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 7 with KinesisDataFetcher

use of org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher in project flink by apache.

the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint.

@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint() throws Exception {
    // ----------------------------------------------------------------------
    // setup initial state
    // ----------------------------------------------------------------------
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock fetcher
    // ----------------------------------------------------------------------
    KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
    List<StreamShardHandle> shards = new ArrayList<>();
    shards.addAll(fakeRestoredState.keySet());
    when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
    // assume the given config is correct
    PowerMockito.mockStatic(KinesisConfigUtil.class);
    PowerMockito.doNothing().when(KinesisConfigUtil.class);
    // ----------------------------------------------------------------------
    // start to test fetcher's initial state seeding
    // ----------------------------------------------------------------------
    TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
    consumer.initializeState(initializationContext);
    consumer.open(new Configuration());
    consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
    for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
        Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Map(java.util.Map) HashMap(java.util.HashMap) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 8 with KinesisDataFetcher

use of org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher in project flink by apache.

the class FlinkKinesisConsumerTest method testSourceSynchronization.

@Test
public void testSourceSynchronization() throws Exception {
    final String streamName = "fakeStreamName";
    final Time maxOutOfOrderness = Time.milliseconds(5);
    final long autoWatermarkInterval = 1_000;
    final long watermarkSyncInterval = autoWatermarkInterval + 1;
    TestWatermarkTracker.WATERMARK.set(0);
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
    props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Collections.singletonList(shard1));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {

                @Override
                protected void emitWatermark() {
                    // before the watermark timer callback is triggered
                    synchronized (sourceContext.getCheckpointLock()) {
                        super.emitWatermark();
                    }
                }
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
    final AtomicBoolean throwOnCollect = new AtomicBoolean();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {

        @Override
        public void markAsTemporarilyIdle() {
        }

        @Override
        public void collect(Serializable element) {
            if (throwOnCollect.get()) {
                throw new RuntimeException("expected");
            }
            super.collect(element);
        }

        @Override
        public void emitWatermark(Watermark mark) {
            results.add(mark);
        }
    };
    final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            sourceThreadError.set(e);
        }
    }).start();
    ArrayList<Object> expectedResults = new ArrayList<>();
    final long record1 = 1;
    shard1.put(Long.toString(record1));
    expectedResults.add(Long.toString(record1));
    awaitRecordCount(results, expectedResults.size());
    // at this point we know the fetcher was initialized
    final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
    // trigger watermark emit
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    expectedResults.add(new Watermark(-4));
    // verify watermark
    awaitRecordCount(results, expectedResults.size());
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    assertEquals(0, TestWatermarkTracker.WATERMARK.get());
    // trigger sync
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    TestWatermarkTracker.assertGlobalWatermark(-4);
    final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
    shard1.put(Long.toString(record2));
    // wait for the record to be buffered in the emitter
    final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
    RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
    Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
    while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
        Thread.sleep(10);
    }
    assertEquals("first record received", 1, emitterQueue.getSize());
    // Advance the watermark. Since the new record is past global watermark + threshold,
    // it won't be emitted and the watermark does not advance
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
    TestWatermarkTracker.assertGlobalWatermark(-4);
    // Trigger global watermark sync
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    expectedResults.add(Long.toString(record2));
    awaitRecordCount(results, expectedResults.size());
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    TestWatermarkTracker.assertGlobalWatermark(3000);
    // Trigger watermark update and emit
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    expectedResults.add(new Watermark(3000));
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    // verify exception propagation
    Assert.assertNull(sourceThreadError.get());
    throwOnCollect.set(true);
    shard1.put(Long.toString(record2 + 1));
    deadline = Deadline.fromNow(Duration.ofSeconds(10));
    while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
        Thread.sleep(10);
    }
    Assert.assertNotNull(sourceThreadError.get());
    Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
    sourceFunc.cancel();
    testHarness.close();
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Deadline(org.apache.flink.api.common.time.Deadline) AtomicReference(java.util.concurrent.atomic.AtomicReference) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) RecordEmitter(org.apache.flink.streaming.connectors.kinesis.util.RecordEmitter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 9 with KinesisDataFetcher

use of org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher in project flink by apache.

the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededOnlyItsOwnStates.

@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededOnlyItsOwnStates() throws Exception {
    // ----------------------------------------------------------------------
    // setup initial state
    // ----------------------------------------------------------------------
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("fakeStream1");
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredStateForOthers = getFakeRestoredStore("fakeStream2");
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredStateForOthers.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock fetcher
    // ----------------------------------------------------------------------
    KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
    List<StreamShardHandle> shards = new ArrayList<>();
    shards.addAll(fakeRestoredState.keySet());
    when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
    // assume the given config is correct
    PowerMockito.mockStatic(KinesisConfigUtil.class);
    PowerMockito.doNothing().when(KinesisConfigUtil.class);
    // ----------------------------------------------------------------------
    // start to test fetcher's initial state seeding
    // ----------------------------------------------------------------------
    TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
    consumer.initializeState(initializationContext);
    consumer.open(new Configuration());
    consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
    for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredStateForOthers.entrySet()) {
        // should never get restored state not belonging to itself
        Mockito.verify(mockedFetcher, never()).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
    }
    for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
        // should get restored state belonging to itself
        Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Map(java.util.Map) HashMap(java.util.HashMap) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 10 with KinesisDataFetcher

use of org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher in project flink by apache.

the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededWithNewDiscoveredKinesisStreamShard.

/*
     * This tests that the consumer correctly picks up shards that were not discovered on the previous run.
     *
     * Case under test:
     *
     * If the original parallelism is 2 and states are:
     *   Consumer subtask 1:
     *     stream1, shard1, SequentialNumber(xxx)
     *   Consumer subtask 2:
     *     stream1, shard2, SequentialNumber(yyy)
     *
     * After discoverNewShardsToSubscribe() if there were two shards (shard3, shard4) created:
     *   Consumer subtask 1 (late for discoverNewShardsToSubscribe()):
     *     stream1, shard1, SequentialNumber(xxx)
     *   Consumer subtask 2:
     *     stream1, shard2, SequentialNumber(yyy)
     *     stream1, shard4, SequentialNumber(zzz)
     *
     * If snapshotState() occurs and parallelism is changed to 1:
     *   Union state will be:
     *     stream1, shard1, SequentialNumber(xxx)
     *     stream1, shard2, SequentialNumber(yyy)
     *     stream1, shard4, SequentialNumber(zzz)
     *   Fetcher should be seeded with:
     *     stream1, shard1, SequentialNumber(xxx)
     *     stream1, shard2, SequentialNumber(yyy)
     *     stream1, share3, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM
     *     stream1, shard4, SequentialNumber(zzz)
     */
@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededWithNewDiscoveredKinesisStreamShard() throws Exception {
    // ----------------------------------------------------------------------
    // setup initial state
    // ----------------------------------------------------------------------
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock fetcher
    // ----------------------------------------------------------------------
    KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
    List<StreamShardHandle> shards = new ArrayList<>();
    shards.addAll(fakeRestoredState.keySet());
    shards.add(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))));
    when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
    // assume the given config is correct
    PowerMockito.mockStatic(KinesisConfigUtil.class);
    PowerMockito.doNothing().when(KinesisConfigUtil.class);
    // ----------------------------------------------------------------------
    // start to test fetcher's initial state seeding
    // ----------------------------------------------------------------------
    TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
    consumer.initializeState(initializationContext);
    consumer.open(new Configuration());
    consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
    fakeRestoredState.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get());
    for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
        Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) Map(java.util.Map) HashMap(java.util.HashMap) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

KinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher)10 Properties (java.util.Properties)8 TestableFlinkKinesisConsumer (org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer)8 Test (org.junit.Test)8 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)8 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 CollectingSourceContext (org.apache.flink.streaming.util.CollectingSourceContext)7 Configuration (org.apache.flink.configuration.Configuration)6 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)5 OperatorStateStore (org.apache.flink.api.common.state.OperatorStateStore)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 StateInitializationContext (org.apache.flink.runtime.state.StateInitializationContext)5 SentinelSequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber)5 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)5 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)5 Map (java.util.Map)4 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)4 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)3 Shard (com.amazonaws.services.kinesis.model.Shard)2