Search in sources :

Example 21 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class KinesisDataFetcher method runFetcher.

/**
 * Starts the fetcher. After starting the fetcher, it can only be stopped by calling {@link
 * KinesisDataFetcher#shutdownFetcher()}.
 *
 * @throws Exception the first error or exception thrown by the fetcher or any of the threads
 *     created by the fetcher.
 */
public void runFetcher() throws Exception {
    // check that we are running before proceeding
    if (!running) {
        return;
    }
    // ------------------------------------------------------------------------
    // Procedures before starting the infinite while loop:
    // ------------------------------------------------------------------------
    // 1. check that there is at least one shard in the subscribed streams to consume from (can
    // be done by
    // checking if at least one value in subscribedStreamsToLastDiscoveredShardIds is not
    // null)
    boolean hasShards = false;
    StringBuilder streamsWithNoShardsFound = new StringBuilder();
    for (Map.Entry<String, String> streamToLastDiscoveredShardEntry : subscribedStreamsToLastDiscoveredShardIds.entrySet()) {
        if (streamToLastDiscoveredShardEntry.getValue() != null) {
            hasShards = true;
        } else {
            streamsWithNoShardsFound.append(streamToLastDiscoveredShardEntry.getKey()).append(", ");
        }
    }
    if (streamsWithNoShardsFound.length() != 0 && LOG.isWarnEnabled()) {
        LOG.warn("Subtask {} has failed to find any shards for the following subscribed streams: {}", indexOfThisConsumerSubtask, streamsWithNoShardsFound.toString());
    }
    if (!hasShards) {
        throw new RuntimeException("No shards can be found for all subscribed streams: " + streams);
    }
    // consumer using a restored state checkpoint
    for (int seededStateIndex = 0; seededStateIndex < subscribedShardsState.size(); seededStateIndex++) {
        KinesisStreamShardState seededShardState = subscribedShardsState.get(seededStateIndex);
        // read already
        if (!seededShardState.getLastProcessedSequenceNum().equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Subtask {} will start consuming seeded shard {} from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, seededShardState.getStreamShardHandle().toString(), seededShardState.getLastProcessedSequenceNum(), seededStateIndex);
            }
            StreamShardHandle streamShardHandle = subscribedShardsState.get(seededStateIndex).getStreamShardHandle();
            KinesisDeserializationSchema<T> shardDeserializationSchema = getClonedDeserializationSchema();
            shardDeserializationSchema.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(runtimeContext, // ignore the provided metric group
            metricGroup -> consumerMetricGroup.addGroup("subtaskId", String.valueOf(indexOfThisConsumerSubtask)).addGroup("shardId", streamShardHandle.getShard().getShardId()).addGroup("user")));
            shardConsumersExecutor.submit(createShardConsumer(seededStateIndex, streamShardHandle, subscribedShardsState.get(seededStateIndex).getLastProcessedSequenceNum(), registerShardMetricGroup(consumerMetricGroup, subscribedShardsState.get(seededStateIndex)), shardDeserializationSchema));
        }
    }
    // start periodic watermark emitter, if a watermark assigner was configured
    if (periodicWatermarkAssigner != null) {
        long periodicWatermarkIntervalMillis = runtimeContext.getExecutionConfig().getAutoWatermarkInterval();
        if (periodicWatermarkIntervalMillis > 0) {
            ProcessingTimeService timerService = ((StreamingRuntimeContext) runtimeContext).getProcessingTimeService();
            LOG.info("Starting periodic watermark emitter with interval {}", periodicWatermarkIntervalMillis);
            new PeriodicWatermarkEmitter(timerService, periodicWatermarkIntervalMillis).start();
            if (watermarkTracker != null) {
                // setup global watermark tracking
                long watermarkSyncMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_WATERMARK_SYNC_MILLIS)));
                watermarkTracker.setUpdateTimeoutMillis(// synchronization latency
                watermarkSyncMillis * 3);
                watermarkTracker.open(runtimeContext);
                new WatermarkSyncCallback(timerService, watermarkSyncMillis).start();
                // emit records ahead of watermark to offset synchronization latency
                long lookaheadMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(0)));
                recordEmitter.setMaxLookaheadMillis(Math.max(lookaheadMillis, watermarkSyncMillis * 3));
                // record emitter depends on periodic watermark
                // it runs in a separate thread since main thread is used for discovery
                Runnable recordEmitterRunnable = new Runnable() {

                    @Override
                    public void run() {
                        try {
                            recordEmitter.run();
                        } catch (Throwable error) {
                            // report the error that terminated the emitter loop to
                            // source thread
                            stopWithError(error);
                        }
                    }
                };
                Thread thread = new Thread(recordEmitterRunnable);
                thread.setName("recordEmitter-" + runtimeContext.getTaskNameWithSubtasks());
                thread.setDaemon(true);
                thread.start();
            }
        }
        this.shardIdleIntervalMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.SHARD_IDLE_INTERVAL_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_IDLE_INTERVAL_MILLIS)));
    }
    // ------------------------------------------------------------------------
    // finally, start the infinite shard discovery and consumer launching loop;
    // we will escape from this loop only when shutdownFetcher() or stopWithError() is called
    // TODO: have this thread emit the records for tracking backpressure
    final long discoveryIntervalMillis = Long.parseLong(configProps.getProperty(ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_DISCOVERY_INTERVAL_MILLIS)));
    if (this.numberOfActiveShards.get() == 0) {
        LOG.info("Subtask {} has no active shards to read on startup; marking the subtask as temporarily idle ...", indexOfThisConsumerSubtask);
        sourceContext.markAsTemporarilyIdle();
    }
    while (running) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Subtask {} is trying to discover new shards that were created due to resharding ...", indexOfThisConsumerSubtask);
        }
        List<StreamShardHandle> newShardsDueToResharding = discoverNewShardsToSubscribe();
        for (StreamShardHandle shard : newShardsDueToResharding) {
            // since there may be delay in discovering a new shard, all new shards due to
            // resharding should be read starting from the earliest record possible
            KinesisStreamShardState newShardState = new KinesisStreamShardState(convertToStreamShardMetadata(shard), shard, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get());
            int newStateIndex = registerNewSubscribedShardState(newShardState);
            if (LOG.isInfoEnabled()) {
                LOG.info("Subtask {} has discovered a new shard {} due to resharding, and will start consuming " + "the shard from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, newShardState.getStreamShardHandle().toString(), newShardState.getLastProcessedSequenceNum(), newStateIndex);
            }
            StreamShardHandle streamShardHandle = newShardState.getStreamShardHandle();
            KinesisDeserializationSchema<T> shardDeserializationSchema = getClonedDeserializationSchema();
            shardDeserializationSchema.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(runtimeContext, // ignore the provided metric group
            metricGroup -> consumerMetricGroup.addGroup("subtaskId", String.valueOf(indexOfThisConsumerSubtask)).addGroup("shardId", streamShardHandle.getShard().getShardId()).addGroup("user")));
            shardConsumersExecutor.submit(createShardConsumer(newStateIndex, newShardState.getStreamShardHandle(), newShardState.getLastProcessedSequenceNum(), registerShardMetricGroup(consumerMetricGroup, newShardState), shardDeserializationSchema));
        }
        // interval if the running flag was set to false during the middle of the while loop
        if (running && discoveryIntervalMillis != 0) {
            try {
                cancelFuture.get(discoveryIntervalMillis, TimeUnit.MILLISECONDS);
                LOG.debug("Cancelled discovery");
            } catch (TimeoutException iex) {
            // timeout is expected when fetcher is not cancelled
            }
        }
    }
    // make sure all resources have been terminated before leaving
    try {
        awaitTermination();
    } catch (InterruptedException ie) {
        // If there is an original exception, preserve it, since that's more important/useful.
        this.error.compareAndSet(null, ie);
    }
    // any error thrown in the shard consumer threads will be thrown to the main thread
    Throwable throwable = this.error.get();
    if (throwable != null) {
        if (throwable instanceof Exception) {
            throw (Exception) throwable;
        } else if (throwable instanceof Error) {
            throw (Error) throwable;
        } else {
            throw new Exception(throwable);
        }
    }
}
Also used : Shard(com.amazonaws.services.kinesis.model.Shard) TimestampedValue(org.apache.flink.streaming.runtime.operators.windowing.TimestampedValue) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) KinesisConsumerMetricConstants(org.apache.flink.streaming.connectors.kinesis.metrics.KinesisConsumerMetricConstants) ConsumerConfigConstants(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants) LoggerFactory(org.slf4j.LoggerFactory) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) TimeoutException(java.util.concurrent.TimeoutException) KinesisProxyV2Factory(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Factory) StartingPosition(org.apache.flink.streaming.connectors.kinesis.model.StartingPosition) InstantiationUtil(org.apache.flink.util.InstantiationUtil) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ShardConsumerMetricsReporter(org.apache.flink.streaming.connectors.kinesis.metrics.ShardConsumerMetricsReporter) Map(java.util.Map) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) ThreadFactory(java.util.concurrent.ThreadFactory) KinesisShardAssigner(org.apache.flink.streaming.connectors.kinesis.KinesisShardAssigner) RecordPublisherType(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.RecordPublisherType) PollingRecordPublisherFactory(org.apache.flink.streaming.connectors.kinesis.internals.publisher.polling.PollingRecordPublisherFactory) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) POLLING(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.RecordPublisherType.POLLING) Preconditions(org.apache.flink.util.Preconditions) Executors(java.util.concurrent.Executors) MetricGroup(org.apache.flink.metrics.MetricGroup) List(java.util.List) RecordPublisherFactory(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisherFactory) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) RecordEmitter(org.apache.flink.streaming.connectors.kinesis.util.RecordEmitter) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) RuntimeContextInitializationContextAdapters(org.apache.flink.api.common.serialization.RuntimeContextInitializationContextAdapters) RecordPublisher(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher) AtomicReference(java.util.concurrent.atomic.AtomicReference) GetShardListResult(org.apache.flink.streaming.connectors.kinesis.proxy.GetShardListResult) ArrayList(java.util.ArrayList) AssignerWithPeriodicWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamConsumerRegistrarUtil(org.apache.flink.streaming.connectors.kinesis.util.StreamConsumerRegistrarUtil) KinesisProxy(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxy) LinkedList(java.util.LinkedList) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) FanOutRecordPublisherFactory(org.apache.flink.streaming.connectors.kinesis.internals.publisher.fanout.FanOutRecordPublisherFactory) RECORD_PUBLISHER_TYPE(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.RECORD_PUBLISHER_TYPE) Logger(org.slf4j.Logger) Properties(java.util.Properties) WatermarkTracker(org.apache.flink.streaming.connectors.kinesis.util.WatermarkTracker) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) IOException(java.io.IOException) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) AWSUtil(org.apache.flink.streaming.connectors.kinesis.util.AWSUtil) Internal(org.apache.flink.annotation.Internal) ProcessingTimeCallback(org.apache.flink.api.common.operators.ProcessingTimeService.ProcessingTimeCallback) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) ProcessingTimeService(org.apache.flink.streaming.runtime.tasks.ProcessingTimeService) TimeoutException(java.util.concurrent.TimeoutException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 22 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class KinesisDataFetcher method discoverNewShardsToSubscribe.

/**
 * A utility function that does the following:
 *
 * <p>1. Find new shards for each stream that we haven't seen before 2. For each new shard,
 * determine whether this consumer subtask should subscribe to them; if yes, it is added to the
 * returned list of shards 3. Update the subscribedStreamsToLastDiscoveredShardIds state so that
 * we won't get shards that we have already seen before the next time this function is called
 */
public List<StreamShardHandle> discoverNewShardsToSubscribe() throws InterruptedException {
    List<StreamShardHandle> newShardsToSubscribe = new LinkedList<>();
    GetShardListResult shardListResult = kinesis.getShardList(subscribedStreamsToLastDiscoveredShardIds);
    if (shardListResult.hasRetrievedShards()) {
        Set<String> streamsWithNewShards = shardListResult.getStreamsWithRetrievedShards();
        for (String stream : streamsWithNewShards) {
            List<StreamShardHandle> newShardsOfStream = shardListResult.getRetrievedShardListOfStream(stream);
            for (StreamShardHandle newShard : newShardsOfStream) {
                int hashCode = shardAssigner.assign(newShard, totalNumberOfConsumerSubtasks);
                if (isThisSubtaskShouldSubscribeTo(hashCode, totalNumberOfConsumerSubtasks, indexOfThisConsumerSubtask)) {
                    newShardsToSubscribe.add(newShard);
                }
            }
            advanceLastDiscoveredShardOfStream(stream, shardListResult.getLastSeenShardOfStream(stream).getShard().getShardId());
        }
    }
    return newShardsToSubscribe;
}
Also used : GetShardListResult(org.apache.flink.streaming.connectors.kinesis.proxy.GetShardListResult) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) LinkedList(java.util.LinkedList)

Example 23 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class FlinkKinesisConsumerTest method testFindSequenceNumberToRestoreFromIfTheShardHasBeenClosedSinceTheStateWasStored.

/**
 * FLINK-8484: ensure that a state change in the StreamShardMetadata other than {@link
 * StreamShardMetadata#getShardId()} or {@link StreamShardMetadata#getStreamName()} does not
 * result in the shard not being able to be restored. This handles the corner case where the
 * stored shard metadata is open (no ending sequence number), but after the job restore, the
 * shard has been closed (ending number set) due to re-sharding, and we can no longer rely on
 * {@link StreamShardMetadata#equals(Object)} to find back the sequence number in the collection
 * of restored shard metadata.
 *
 * <p>Therefore, we will rely on synchronizing the snapshot's state with the Kinesis shard
 * before attempting to find back the sequence number to restore.
 */
@Test
public void testFindSequenceNumberToRestoreFromIfTheShardHasBeenClosedSinceTheStateWasStored() throws Exception {
    // ----------------------------------------------------------------------
    // setup initial state
    // ----------------------------------------------------------------------
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock fetcher
    // ----------------------------------------------------------------------
    KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
    List<StreamShardHandle> shards = new ArrayList<>();
    // create a fake stream shard handle based on the first entry in the restored state
    final StreamShardHandle originalStreamShardHandle = fakeRestoredState.keySet().iterator().next();
    final StreamShardHandle closedStreamShardHandle = new StreamShardHandle(originalStreamShardHandle.getStreamName(), originalStreamShardHandle.getShard());
    // close the shard handle by setting an ending sequence number
    final SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
    sequenceNumberRange.setEndingSequenceNumber("1293844");
    closedStreamShardHandle.getShard().setSequenceNumberRange(sequenceNumberRange);
    shards.add(closedStreamShardHandle);
    when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
    // assume the given config is correct
    PowerMockito.mockStatic(KinesisConfigUtil.class);
    PowerMockito.doNothing().when(KinesisConfigUtil.class);
    // ----------------------------------------------------------------------
    // start to test fetcher's initial state seeding
    // ----------------------------------------------------------------------
    TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
    consumer.initializeState(initializationContext);
    consumer.open(new Configuration());
    consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
    Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(closedStreamShardHandle), closedStreamShardHandle, fakeRestoredState.get(closedStreamShardHandle)));
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Map(java.util.Map) HashMap(java.util.HashMap) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 24 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint.

@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint() throws Exception {
    // ----------------------------------------------------------------------
    // setup initial state
    // ----------------------------------------------------------------------
    HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
        listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock fetcher
    // ----------------------------------------------------------------------
    KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
    List<StreamShardHandle> shards = new ArrayList<>();
    shards.addAll(fakeRestoredState.keySet());
    when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
    // assume the given config is correct
    PowerMockito.mockStatic(KinesisConfigUtil.class);
    PowerMockito.doNothing().when(KinesisConfigUtil.class);
    // ----------------------------------------------------------------------
    // start to test fetcher's initial state seeding
    // ----------------------------------------------------------------------
    TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
    consumer.initializeState(initializationContext);
    consumer.open(new Configuration());
    consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
    for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
        Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Map(java.util.Map) HashMap(java.util.HashMap) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 25 with StreamShardHandle

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.

the class KinesisDataFetcherTest method testStreamShardMetadataAndHandleConversion.

@Test
public void testStreamShardMetadataAndHandleConversion() {
    String streamName = "fakeStream1";
    String shardId = "shard-000001";
    String parentShardId = "shard-000002";
    String adjacentParentShardId = "shard-000003";
    String startingHashKey = "key-000001";
    String endingHashKey = "key-000010";
    String startingSequenceNumber = "seq-0000021";
    String endingSequenceNumber = "seq-00000031";
    StreamShardMetadata kinesisStreamShard = new StreamShardMetadata();
    kinesisStreamShard.setStreamName(streamName);
    kinesisStreamShard.setShardId(shardId);
    kinesisStreamShard.setParentShardId(parentShardId);
    kinesisStreamShard.setAdjacentParentShardId(adjacentParentShardId);
    kinesisStreamShard.setStartingHashKey(startingHashKey);
    kinesisStreamShard.setEndingHashKey(endingHashKey);
    kinesisStreamShard.setStartingSequenceNumber(startingSequenceNumber);
    kinesisStreamShard.setEndingSequenceNumber(endingSequenceNumber);
    Shard shard = new Shard().withShardId(shardId).withParentShardId(parentShardId).withAdjacentParentShardId(adjacentParentShardId).withHashKeyRange(new HashKeyRange().withStartingHashKey(startingHashKey).withEndingHashKey(endingHashKey)).withSequenceNumberRange(new SequenceNumberRange().withStartingSequenceNumber(startingSequenceNumber).withEndingSequenceNumber(endingSequenceNumber));
    StreamShardHandle streamShardHandle = new StreamShardHandle(streamName, shard);
    assertEquals(kinesisStreamShard, KinesisDataFetcher.convertToStreamShardMetadata(streamShardHandle));
    assertEquals(streamShardHandle, KinesisDataFetcher.convertToStreamShardHandle(kinesisStreamShard));
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) Shard(com.amazonaws.services.kinesis.model.Shard) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange) Test(org.junit.Test)

Aggregations

StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)30 Shard (com.amazonaws.services.kinesis.model.Shard)22 Test (org.junit.Test)19 ArrayList (java.util.ArrayList)18 HashMap (java.util.HashMap)16 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)16 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)15 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)14 Properties (java.util.Properties)12 Map (java.util.Map)10 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)10 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)9 SentinelSequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber)9 LinkedList (java.util.LinkedList)8 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)7 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)6 OperatorStateStore (org.apache.flink.api.common.state.OperatorStateStore)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 Configuration (org.apache.flink.configuration.Configuration)6 StateInitializationContext (org.apache.flink.runtime.state.StateInitializationContext)6