Search in sources :

Example 11 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class KinesisDataFetcher method convertToStreamShardHandle.

/**
 * Utility function to convert {@link StreamShardMetadata} into {@link StreamShardHandle}.
 *
 * @param streamShardMetadata the {@link StreamShardMetadata} to be converted
 * @return a {@link StreamShardHandle} object
 */
public static StreamShardHandle convertToStreamShardHandle(StreamShardMetadata streamShardMetadata) {
    Shard shard = new Shard();
    shard.withShardId(streamShardMetadata.getShardId());
    shard.withParentShardId(streamShardMetadata.getParentShardId());
    shard.withAdjacentParentShardId(streamShardMetadata.getAdjacentParentShardId());
    HashKeyRange hashKeyRange = new HashKeyRange();
    hashKeyRange.withStartingHashKey(streamShardMetadata.getStartingHashKey());
    hashKeyRange.withEndingHashKey(streamShardMetadata.getEndingHashKey());
    shard.withHashKeyRange(hashKeyRange);
    SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
    sequenceNumberRange.withStartingSequenceNumber(streamShardMetadata.getStartingSequenceNumber());
    sequenceNumberRange.withEndingSequenceNumber(streamShardMetadata.getEndingSequenceNumber());
    shard.withSequenceNumberRange(sequenceNumberRange);
    return new StreamShardHandle(streamShardMetadata.getStreamName(), shard);
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Shard(com.amazonaws.services.kinesis.model.Shard) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange)

Example 12 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class KinesisProxy method getShardsOfStream.

private List<StreamShardHandle> getShardsOfStream(String streamName, @Nullable String lastSeenShardId) throws InterruptedException {
    List<StreamShardHandle> shardsOfStream = new ArrayList<>();
    // List Shards returns just the first 1000 shard entries. In order to read the entire
    // stream,
    // we need to use the returned nextToken to get additional shards.
    ListShardsResult listShardsResult;
    String startShardToken = null;
    do {
        listShardsResult = listShards(streamName, lastSeenShardId, startShardToken);
        if (listShardsResult == null) {
            // In case we have exceptions while retrieving all shards, ensure that incomplete
            // shard list is not returned.
            // Hence clearing the incomplete shard list before returning it.
            shardsOfStream.clear();
            return shardsOfStream;
        }
        List<Shard> shards = listShardsResult.getShards();
        for (Shard shard : shards) {
            shardsOfStream.add(new StreamShardHandle(streamName, shard));
        }
        startShardToken = listShardsResult.getNextToken();
    } while (startShardToken != null);
    return shardsOfStream;
}
Also used : ListShardsResult(com.amazonaws.services.kinesis.model.ListShardsResult) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) ArrayList(java.util.ArrayList) Shard(com.amazonaws.services.kinesis.model.Shard)

Example 13 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class FlinkKinesisConsumer method run.

// ------------------------------------------------------------------------
// Source life cycle
// ------------------------------------------------------------------------
@Override
public void run(SourceContext<T> sourceContext) throws Exception {
    // all subtasks will run a fetcher, regardless of whether or not the subtask will initially
    // have
    // shards to subscribe to; fetchers will continuously poll for changes in the shard list, so
    // all subtasks
    // can potentially have new shards to subscribe to later on
    KinesisDataFetcher<T> fetcher = createFetcher(streams, sourceContext, getRuntimeContext(), configProps, deserializer);
    // initial discovery
    List<StreamShardHandle> allShards = fetcher.discoverNewShardsToSubscribe();
    for (StreamShardHandle shard : allShards) {
        StreamShardMetadata.EquivalenceWrapper kinesisStreamShard = new StreamShardMetadata.EquivalenceWrapper(KinesisDataFetcher.convertToStreamShardMetadata(shard));
        if (sequenceNumsToRestore != null) {
            if (sequenceNumsToRestore.containsKey(kinesisStreamShard)) {
                // if the shard was already seen and is contained in the state,
                // just use the sequence number stored in the state
                fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(kinesisStreamShard.getShardMetadata(), shard, sequenceNumsToRestore.get(kinesisStreamShard)));
                if (LOG.isInfoEnabled()) {
                    LOG.info("Subtask {} is seeding the fetcher with restored shard {}," + " starting state set to the restored sequence number {}", getRuntimeContext().getIndexOfThisSubtask(), shard.toString(), sequenceNumsToRestore.get(kinesisStreamShard));
                }
            } else {
                // the shard wasn't discovered in the previous run, therefore should be consumed
                // from the beginning
                fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(kinesisStreamShard.getShardMetadata(), shard, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get()));
                if (LOG.isInfoEnabled()) {
                    LOG.info("Subtask {} is seeding the fetcher with new discovered shard {}," + " starting state set to the SENTINEL_EARLIEST_SEQUENCE_NUM", getRuntimeContext().getIndexOfThisSubtask(), shard.toString());
                }
            }
        } else {
            // we're starting fresh; use the configured start position as initial state
            SentinelSequenceNumber startingSeqNum = InitialPosition.valueOf(configProps.getProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.DEFAULT_STREAM_INITIAL_POSITION)).toSentinelSequenceNumber();
            fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(kinesisStreamShard.getShardMetadata(), shard, startingSeqNum.get()));
            if (LOG.isInfoEnabled()) {
                LOG.info("Subtask {} will be seeded with initial shard {}, starting state set as sequence number {}", getRuntimeContext().getIndexOfThisSubtask(), shard.toString(), startingSeqNum.get());
            }
        }
    }
    // check that we are running before starting the fetcher
    if (!running) {
        return;
    }
    // expose the fetcher from this point, so that state
    // snapshots can be taken from the fetcher's state holders
    this.fetcher = fetcher;
    // start the fetcher loop. The fetcher will stop running only when cancel() or
    // close() is called, or an error is thrown by threads created by the fetcher
    fetcher.runFetcher();
    // check that the fetcher has terminated before fully closing
    fetcher.awaitTermination();
    sourceContext.close();
}
Also used : StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)

Example 14 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class DynamoDBStreamsDataFetcherTest method testCreateRecordPublisherRespectsShardIteratorTypeLatest.

@Test
public void testCreateRecordPublisherRespectsShardIteratorTypeLatest() throws Exception {
    RuntimeContext runtimeContext = TestUtils.getMockedRuntimeContext(1, 0);
    KinesisProxyInterface kinesis = mock(KinesisProxyInterface.class);
    DynamoDBStreamsDataFetcher<String> fetcher = new DynamoDBStreamsDataFetcher<>(singletonList("fakeStream"), new TestSourceContext<>(), runtimeContext, TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), DEFAULT_SHARD_ASSIGNER, config -> kinesis);
    StreamShardHandle dummyStreamShardHandle = TestUtils.createDummyStreamShardHandle("dummy-stream", "0");
    fetcher.createRecordPublisher(SENTINEL_LATEST_SEQUENCE_NUM.get(), new Properties(), createFakeShardConsumerMetricGroup(runtimeContext.getMetricGroup()), dummyStreamShardHandle);
    verify(kinesis).getShardIterator(dummyStreamShardHandle, LATEST.toString(), null);
}
Also used : StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Properties(java.util.Properties) Test(org.junit.Test)

Example 15 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class ShardConsumerTestUtils method assertNumberOfMessagesReceivedFromKinesis.

public static ShardConsumerMetricsReporter assertNumberOfMessagesReceivedFromKinesis(final int expectedNumberOfMessages, final RecordPublisherFactory recordPublisherFactory, final SequenceNumber startingSequenceNumber, final Properties consumerProperties, final SequenceNumber expectedLastProcessedSequenceNum, final AbstractMetricGroup metricGroup) throws InterruptedException {
    ShardConsumerMetricsReporter shardMetricsReporter = new ShardConsumerMetricsReporter(metricGroup);
    StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);
    LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
    subscribedShardsStateUnderTest.add(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, startingSequenceNumber));
    TestSourceContext<String> sourceContext = new TestSourceContext<>();
    KinesisDeserializationSchemaWrapper<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(Collections.singletonList("fakeStream"), sourceContext, consumerProperties, deserializationSchema, 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class), Mockito.mock(KinesisProxyV2Interface.class));
    final StreamShardHandle shardHandle = subscribedShardsStateUnderTest.get(0).getStreamShardHandle();
    final SequenceNumber lastProcessedSequenceNum = subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum();
    final StartingPosition startingPosition = AWSUtil.getStartingPosition(lastProcessedSequenceNum, consumerProperties);
    final RecordPublisher recordPublisher = recordPublisherFactory.create(startingPosition, fetcher.getConsumerConfiguration(), metricGroup, shardHandle);
    int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
    new ShardConsumer<>(fetcher, recordPublisher, shardIndex, shardHandle, lastProcessedSequenceNum, shardMetricsReporter, deserializationSchema).run();
    assertEquals(expectedNumberOfMessages, sourceContext.getCollectedOutputs().size());
    assertEquals(expectedLastProcessedSequenceNum, subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
    return shardMetricsReporter;
}
Also used : StartingPosition(org.apache.flink.streaming.connectors.kinesis.model.StartingPosition) ShardConsumerMetricsReporter(org.apache.flink.streaming.connectors.kinesis.metrics.ShardConsumerMetricsReporter) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) LinkedList(java.util.LinkedList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) RecordPublisher(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)

Aggregations

StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)30 Shard (com.amazonaws.services.kinesis.model.Shard)22 Test (org.junit.Test)19 ArrayList (java.util.ArrayList)18 HashMap (java.util.HashMap)16 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)16 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)15 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)14 Properties (java.util.Properties)12 Map (java.util.Map)10 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)10 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)9 SentinelSequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber)9 LinkedList (java.util.LinkedList)8 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)7 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)6 OperatorStateStore (org.apache.flink.api.common.state.OperatorStateStore)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 Configuration (org.apache.flink.configuration.Configuration)6 StateInitializationContext (org.apache.flink.runtime.state.StateInitializationContext)6