Search in sources :

Example 1 with TestSourceContext

use of org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext in project flink by apache.

the class KinesisDataFetcherTest method testPeriodicWatermark.

@Test
public void testPeriodicWatermark() {
    final MutableLong clock = new MutableLong();
    final MutableBoolean isTemporaryIdle = new MutableBoolean();
    final List<Watermark> watermarks = new ArrayList<>();
    String fakeStream1 = "fakeStream1";
    StreamShardHandle shardHandle = new StreamShardHandle(fakeStream1, new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)));
    TestSourceContext<String> sourceContext = new TestSourceContext<String>() {

        @Override
        public void emitWatermark(Watermark mark) {
            watermarks.add(mark);
        }

        @Override
        public void markAsTemporarilyIdle() {
            isTemporaryIdle.setTrue();
        }
    };
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = new HashMap<>();
    final KinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList(fakeStream1), sourceContext, new java.util.Properties(), new KinesisDeserializationSchemaWrapper<>(new org.apache.flink.streaming.util.serialization.SimpleStringSchema()), 1, 1, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(new HashMap<>())) {

        @Override
        protected long getCurrentTimeMillis() {
            return clock.getValue();
        }
    };
    Whitebox.setInternalState(fetcher, "periodicWatermarkAssigner", watermarkAssigner);
    SequenceNumber seq = new SequenceNumber("fakeSequenceNumber");
    // register shards to subsequently emit records
    int shardIndex = fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(shardHandle), shardHandle, seq));
    StreamRecord<String> record1 = new StreamRecord<>(String.valueOf(Long.MIN_VALUE), Long.MIN_VALUE);
    fetcher.emitRecordAndUpdateState(record1.getValue(), record1.getTimestamp(), shardIndex, seq);
    Assert.assertEquals(record1, sourceContext.getCollectedOutputs().poll());
    fetcher.emitWatermark();
    Assert.assertTrue("potential watermark equals previous watermark", watermarks.isEmpty());
    StreamRecord<String> record2 = new StreamRecord<>(String.valueOf(1), 1);
    fetcher.emitRecordAndUpdateState(record2.getValue(), record2.getTimestamp(), shardIndex, seq);
    Assert.assertEquals(record2, sourceContext.getCollectedOutputs().poll());
    fetcher.emitWatermark();
    Assert.assertFalse("watermark advanced", watermarks.isEmpty());
    Assert.assertEquals(new Watermark(record2.getTimestamp()), watermarks.remove(0));
    Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());
    // test idle timeout
    long idleTimeout = 10;
    // advance clock idleTimeout
    clock.add(idleTimeout + 1);
    fetcher.emitWatermark();
    Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());
    Assert.assertTrue("not idle, no new watermark", watermarks.isEmpty());
    // activate idle timeout
    Whitebox.setInternalState(fetcher, "shardIdleIntervalMillis", idleTimeout);
    fetcher.emitWatermark();
    Assert.assertTrue("idle", isTemporaryIdle.booleanValue());
    Assert.assertTrue("idle, no watermark", watermarks.isEmpty());
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Properties(java.util.Properties) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) MutableBoolean(org.apache.commons.lang3.mutable.MutableBoolean) MutableLong(org.apache.commons.lang3.mutable.MutableLong) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 2 with TestSourceContext

use of org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext in project flink by apache.

the class ShardConsumerTestUtils method assertNumberOfMessagesReceivedFromKinesis.

public static ShardConsumerMetricsReporter assertNumberOfMessagesReceivedFromKinesis(final int expectedNumberOfMessages, final RecordPublisherFactory recordPublisherFactory, final SequenceNumber startingSequenceNumber, final Properties consumerProperties, final SequenceNumber expectedLastProcessedSequenceNum, final AbstractMetricGroup metricGroup) throws InterruptedException {
    ShardConsumerMetricsReporter shardMetricsReporter = new ShardConsumerMetricsReporter(metricGroup);
    StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);
    LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
    subscribedShardsStateUnderTest.add(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, startingSequenceNumber));
    TestSourceContext<String> sourceContext = new TestSourceContext<>();
    KinesisDeserializationSchemaWrapper<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(Collections.singletonList("fakeStream"), sourceContext, consumerProperties, deserializationSchema, 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class), Mockito.mock(KinesisProxyV2Interface.class));
    final StreamShardHandle shardHandle = subscribedShardsStateUnderTest.get(0).getStreamShardHandle();
    final SequenceNumber lastProcessedSequenceNum = subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum();
    final StartingPosition startingPosition = AWSUtil.getStartingPosition(lastProcessedSequenceNum, consumerProperties);
    final RecordPublisher recordPublisher = recordPublisherFactory.create(startingPosition, fetcher.getConsumerConfiguration(), metricGroup, shardHandle);
    int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
    new ShardConsumer<>(fetcher, recordPublisher, shardIndex, shardHandle, lastProcessedSequenceNum, shardMetricsReporter, deserializationSchema).run();
    assertEquals(expectedNumberOfMessages, sourceContext.getCollectedOutputs().size());
    assertEquals(expectedLastProcessedSequenceNum, subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
    return shardMetricsReporter;
}
Also used : StartingPosition(org.apache.flink.streaming.connectors.kinesis.model.StartingPosition) ShardConsumerMetricsReporter(org.apache.flink.streaming.connectors.kinesis.metrics.ShardConsumerMetricsReporter) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) LinkedList(java.util.LinkedList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) RecordPublisher(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)

Example 3 with TestSourceContext

use of org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext in project flink by apache.

the class KinesisDataFetcherTest method testCancelDuringDiscovery.

@Test
public void testCancelDuringDiscovery() throws Exception {
    final String stream = "fakeStream";
    final int numShards = 3;
    Properties standardProperties = TestUtils.getStandardProperties();
    standardProperties.setProperty(SHARD_DISCOVERY_INTERVAL_MILLIS, "10000000");
    final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
    final TestSourceContext<String> sourceContext = new TestSourceContext<>();
    TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList(stream), sourceContext, standardProperties, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 1, 0, new AtomicReference<>(), testShardStates, new HashMap<>(), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));
    // FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
    // run the consumer until it reaches the point where the fetcher starts to run
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
    CheckedThread consumerThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    consumerThread.start();
    // wait for the second discovery to be triggered, that has a high probability to be inside
    // discovery sleep (10k s)
    fetcher.waitUntilDiscovery(2);
    Thread.sleep(1000);
    consumer.cancel();
    consumerThread.sync();
}
Also used : Properties(java.util.Properties) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Example 4 with TestSourceContext

use of org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext in project flink by apache.

the class KinesisDataFetcherTest method testShardToSubtaskMappingWithCustomHashFunction.

// ----------------------------------------------------------------------
// Tests shard distribution with custom hash function
// ----------------------------------------------------------------------
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {
    int totalCountOfSubtasks = 10;
    int shardCount = 3;
    for (int i = 0; i < 2; i++) {
        final int hash = i;
        final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
        Map<String, Integer> streamToShardCount = new HashMap<>();
        List<String> fakeStreams = new LinkedList<>();
        fakeStreams.add("fakeStream");
        streamToShardCount.put("fakeStream", shardCount);
        for (int j = 0; j < totalCountOfSubtasks; j++) {
            int subtaskIndex = j;
            // subscribe with default hashing
            final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new TestSourceContext<>(), new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), totalCountOfSubtasks, subtaskIndex, new AtomicReference<>(), new LinkedList<>(), KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
            Whitebox.setInternalState(fetcher, "shardAssigner", // override hashing
            allShardsSingleSubtaskFn);
            List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
            fetcher.shutdownFetcher();
            String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
            if (j == i) {
                assertEquals(msg, shardCount, shards.size());
            } else {
                assertEquals(msg, 0, shards.size());
            }
        }
    }
}
Also used : KinesisShardAssigner(org.apache.flink.streaming.connectors.kinesis.KinesisShardAssigner) Shard(com.amazonaws.services.kinesis.model.Shard) Arrays(java.util.Arrays) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) Random(java.util.Random) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) Collections.singletonList(java.util.Collections.singletonList) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) MutableLong(org.apache.commons.lang3.mutable.MutableLong) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) KinesisShardIdGenerator(org.apache.flink.streaming.connectors.kinesis.testutils.KinesisShardIdGenerator) Assert.fail(org.junit.Assert.fail) KinesisShardAssigner(org.apache.flink.streaming.connectors.kinesis.KinesisShardAssigner) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Set(java.util.Set) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) MutableBoolean(org.apache.commons.lang3.mutable.MutableBoolean) Mockito.mock(org.mockito.Mockito.mock) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) Whitebox(org.powermock.reflect.Whitebox) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) RecordPublisher(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher) AtomicReference(java.util.concurrent.atomic.AtomicReference) BoundedOutOfOrdernessTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor) ArrayList(java.util.ArrayList) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisConsumer) LinkedList(java.util.LinkedList) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) FakeKinesisBehavioursFactory(org.apache.flink.streaming.connectors.kinesis.testutils.FakeKinesisBehavioursFactory) SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AlwaysThrowsDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.testutils.AlwaysThrowsDeserializationSchema) Mockito.when(org.mockito.Mockito.when) TestUtils(org.apache.flink.streaming.connectors.kinesis.testutils.TestUtils) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Assert.assertNull(org.junit.Assert.assertNull) TestableKinesisDataFetcherForShardConsumerException(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcherForShardConsumerException) Assert(org.junit.Assert) Collections(java.util.Collections) SHARD_DISCOVERY_INTERVAL_MILLIS(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) Properties(java.util.Properties) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Example 5 with TestSourceContext

use of org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext in project flink by apache.

the class KinesisDataFetcherTest method testSkipCorruptedRecord.

@Test
public void testSkipCorruptedRecord() throws Exception {
    final String stream = "fakeStream";
    final int numShards = 3;
    final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
    final TestSourceContext<String> sourceContext = new TestSourceContext<>();
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(singletonList(stream), sourceContext, TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 1, 0, new AtomicReference<>(), testShardStates, new HashMap<>(), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));
    // FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
    // run the consumer until it reaches the point where the fetcher starts to run
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
    CheckedThread consumerThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    consumerThread.start();
    fetcher.waitUntilRun();
    consumer.cancel();
    consumerThread.sync();
    assertEquals(numShards, testShardStates.size());
    for (int i = 0; i < numShards; i++) {
        fetcher.emitRecordAndUpdateState("record-" + i, 10L, i, new SequenceNumber("seq-num-1"));
        assertEquals(new SequenceNumber("seq-num-1"), testShardStates.get(i).getLastProcessedSequenceNum());
        assertEquals(new StreamRecord<>("record-" + i, 10L), sourceContext.removeLatestOutput());
    }
    // emitting a null (i.e., a corrupt record) should not produce any output, but still have
    // the shard state updated
    fetcher.emitRecordAndUpdateState(null, 10L, 1, new SequenceNumber("seq-num-2"));
    assertEquals(new SequenceNumber("seq-num-2"), testShardStates.get(1).getLastProcessedSequenceNum());
    // no output should have been collected
    assertNull(sourceContext.removeLatestOutput());
}
Also used : CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)5 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)5 TestSourceContext (org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext)5 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)5 LinkedList (java.util.LinkedList)4 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)4 Test (org.junit.Test)4 Properties (java.util.Properties)3 CheckedThread (org.apache.flink.core.testutils.CheckedThread)3 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)3 Shard (com.amazonaws.services.kinesis.model.Shard)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 MutableBoolean (org.apache.commons.lang3.mutable.MutableBoolean)2 MutableLong (org.apache.commons.lang3.mutable.MutableLong)2 Watermark (org.apache.flink.streaming.api.watermark.Watermark)2 RecordPublisher (org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher)2 KinesisProxyInterface (org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface)2 KinesisProxyV2Interface (org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface)2 KinesisDeserializationSchemaWrapper (org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper)2