Search in sources :

Example 1 with CollectingSourceContext

use of org.apache.flink.streaming.util.CollectingSourceContext in project flink by apache.

the class FlinkKinesisConsumerTest method testPeriodicWatermark.

@Test
public void testPeriodicWatermark() throws Exception {
    String streamName = "fakeStreamName";
    Time maxOutOfOrderness = Time.milliseconds(5);
    long autoWatermarkInterval = 1_000;
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {

        @Override
        public void emitWatermark(Watermark mark) {
            watermarks.add(mark);
        }

        @Override
        public void markAsTemporarilyIdle() {
        }
    };
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).start();
    shard1.put("1");
    shard1.put("2");
    shard2.put("10");
    int recordCount = 3;
    int watermarkCount = 0;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, first watermark is -3
    // - Shard-1 @2
    // - Shard-2 @10
    // - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    // advance watermark
    shard1.put("10");
    recordCount++;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, second watermark is -3
    // - Shard-1 @10
    // - Shard-2 @10
    // - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    sourceFunc.cancel();
    testHarness.close();
    assertEquals("record count", recordCount, testHarness.getOutput().size());
    assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
    assertEquals("watermark count", watermarkCount, watermarks.size());
}
Also used : HashMap(java.util.HashMap) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 2 with CollectingSourceContext

use of org.apache.flink.streaming.util.CollectingSourceContext in project flink by apache.

the class FlinkKinesisConsumerTest method testSourceSynchronization.

@Test
public void testSourceSynchronization() throws Exception {
    final String streamName = "fakeStreamName";
    final Time maxOutOfOrderness = Time.milliseconds(5);
    final long autoWatermarkInterval = 1_000;
    final long watermarkSyncInterval = autoWatermarkInterval + 1;
    TestWatermarkTracker.WATERMARK.set(0);
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
    props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Collections.singletonList(shard1));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {

                @Override
                protected void emitWatermark() {
                    // before the watermark timer callback is triggered
                    synchronized (sourceContext.getCheckpointLock()) {
                        super.emitWatermark();
                    }
                }
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
    final AtomicBoolean throwOnCollect = new AtomicBoolean();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {

        @Override
        public void markAsTemporarilyIdle() {
        }

        @Override
        public void collect(Serializable element) {
            if (throwOnCollect.get()) {
                throw new RuntimeException("expected");
            }
            super.collect(element);
        }

        @Override
        public void emitWatermark(Watermark mark) {
            results.add(mark);
        }
    };
    final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            sourceThreadError.set(e);
        }
    }).start();
    ArrayList<Object> expectedResults = new ArrayList<>();
    final long record1 = 1;
    shard1.put(Long.toString(record1));
    expectedResults.add(Long.toString(record1));
    awaitRecordCount(results, expectedResults.size());
    // at this point we know the fetcher was initialized
    final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
    // trigger watermark emit
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    expectedResults.add(new Watermark(-4));
    // verify watermark
    awaitRecordCount(results, expectedResults.size());
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    assertEquals(0, TestWatermarkTracker.WATERMARK.get());
    // trigger sync
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    TestWatermarkTracker.assertGlobalWatermark(-4);
    final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
    shard1.put(Long.toString(record2));
    // wait for the record to be buffered in the emitter
    final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
    RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
    Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
    while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
        Thread.sleep(10);
    }
    assertEquals("first record received", 1, emitterQueue.getSize());
    // Advance the watermark. Since the new record is past global watermark + threshold,
    // it won't be emitted and the watermark does not advance
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
    TestWatermarkTracker.assertGlobalWatermark(-4);
    // Trigger global watermark sync
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    expectedResults.add(Long.toString(record2));
    awaitRecordCount(results, expectedResults.size());
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    TestWatermarkTracker.assertGlobalWatermark(3000);
    // Trigger watermark update and emit
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    expectedResults.add(new Watermark(3000));
    assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
    // verify exception propagation
    Assert.assertNull(sourceThreadError.get());
    throwOnCollect.set(true);
    shard1.put(Long.toString(record2 + 1));
    deadline = Deadline.fromNow(Duration.ofSeconds(10));
    while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
        Thread.sleep(10);
    }
    Assert.assertNotNull(sourceThreadError.get());
    Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
    sourceFunc.cancel();
    testHarness.close();
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Deadline(org.apache.flink.api.common.time.Deadline) AtomicReference(java.util.concurrent.atomic.AtomicReference) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) RecordEmitter(org.apache.flink.streaming.connectors.kinesis.util.RecordEmitter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Properties (java.util.Properties)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)2 SourceFunction (org.apache.flink.streaming.api.functions.source.SourceFunction)2 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)2 Watermark (org.apache.flink.streaming.api.watermark.Watermark)2 Time (org.apache.flink.streaming.api.windowing.time.Time)2 KinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher)2 KinesisDeserializationSchema (org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema)2 KinesisDeserializationSchemaWrapper (org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper)2 TestableFlinkKinesisConsumer (org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer)2 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)2 CollectingSourceContext (org.apache.flink.streaming.util.CollectingSourceContext)2 Test (org.junit.Test)2 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)2 Serializable (java.io.Serializable)1 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)1