use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcher method runFetcher.
/**
* Starts the fetcher. After starting the fetcher, it can only be stopped by calling {@link
* KinesisDataFetcher#shutdownFetcher()}.
*
* @throws Exception the first error or exception thrown by the fetcher or any of the threads
* created by the fetcher.
*/
public void runFetcher() throws Exception {
// check that we are running before proceeding
if (!running) {
return;
}
// ------------------------------------------------------------------------
// Procedures before starting the infinite while loop:
// ------------------------------------------------------------------------
// 1. check that there is at least one shard in the subscribed streams to consume from (can
// be done by
// checking if at least one value in subscribedStreamsToLastDiscoveredShardIds is not
// null)
boolean hasShards = false;
StringBuilder streamsWithNoShardsFound = new StringBuilder();
for (Map.Entry<String, String> streamToLastDiscoveredShardEntry : subscribedStreamsToLastDiscoveredShardIds.entrySet()) {
if (streamToLastDiscoveredShardEntry.getValue() != null) {
hasShards = true;
} else {
streamsWithNoShardsFound.append(streamToLastDiscoveredShardEntry.getKey()).append(", ");
}
}
if (streamsWithNoShardsFound.length() != 0 && LOG.isWarnEnabled()) {
LOG.warn("Subtask {} has failed to find any shards for the following subscribed streams: {}", indexOfThisConsumerSubtask, streamsWithNoShardsFound.toString());
}
if (!hasShards) {
throw new RuntimeException("No shards can be found for all subscribed streams: " + streams);
}
// consumer using a restored state checkpoint
for (int seededStateIndex = 0; seededStateIndex < subscribedShardsState.size(); seededStateIndex++) {
KinesisStreamShardState seededShardState = subscribedShardsState.get(seededStateIndex);
// read already
if (!seededShardState.getLastProcessedSequenceNum().equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get())) {
if (LOG.isInfoEnabled()) {
LOG.info("Subtask {} will start consuming seeded shard {} from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, seededShardState.getStreamShardHandle().toString(), seededShardState.getLastProcessedSequenceNum(), seededStateIndex);
}
StreamShardHandle streamShardHandle = subscribedShardsState.get(seededStateIndex).getStreamShardHandle();
KinesisDeserializationSchema<T> shardDeserializationSchema = getClonedDeserializationSchema();
shardDeserializationSchema.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(runtimeContext, // ignore the provided metric group
metricGroup -> consumerMetricGroup.addGroup("subtaskId", String.valueOf(indexOfThisConsumerSubtask)).addGroup("shardId", streamShardHandle.getShard().getShardId()).addGroup("user")));
shardConsumersExecutor.submit(createShardConsumer(seededStateIndex, streamShardHandle, subscribedShardsState.get(seededStateIndex).getLastProcessedSequenceNum(), registerShardMetricGroup(consumerMetricGroup, subscribedShardsState.get(seededStateIndex)), shardDeserializationSchema));
}
}
// start periodic watermark emitter, if a watermark assigner was configured
if (periodicWatermarkAssigner != null) {
long periodicWatermarkIntervalMillis = runtimeContext.getExecutionConfig().getAutoWatermarkInterval();
if (periodicWatermarkIntervalMillis > 0) {
ProcessingTimeService timerService = ((StreamingRuntimeContext) runtimeContext).getProcessingTimeService();
LOG.info("Starting periodic watermark emitter with interval {}", periodicWatermarkIntervalMillis);
new PeriodicWatermarkEmitter(timerService, periodicWatermarkIntervalMillis).start();
if (watermarkTracker != null) {
// setup global watermark tracking
long watermarkSyncMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_WATERMARK_SYNC_MILLIS)));
watermarkTracker.setUpdateTimeoutMillis(// synchronization latency
watermarkSyncMillis * 3);
watermarkTracker.open(runtimeContext);
new WatermarkSyncCallback(timerService, watermarkSyncMillis).start();
// emit records ahead of watermark to offset synchronization latency
long lookaheadMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(0)));
recordEmitter.setMaxLookaheadMillis(Math.max(lookaheadMillis, watermarkSyncMillis * 3));
// record emitter depends on periodic watermark
// it runs in a separate thread since main thread is used for discovery
Runnable recordEmitterRunnable = new Runnable() {
@Override
public void run() {
try {
recordEmitter.run();
} catch (Throwable error) {
// report the error that terminated the emitter loop to
// source thread
stopWithError(error);
}
}
};
Thread thread = new Thread(recordEmitterRunnable);
thread.setName("recordEmitter-" + runtimeContext.getTaskNameWithSubtasks());
thread.setDaemon(true);
thread.start();
}
}
this.shardIdleIntervalMillis = Long.parseLong(getConsumerConfiguration().getProperty(ConsumerConfigConstants.SHARD_IDLE_INTERVAL_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_IDLE_INTERVAL_MILLIS)));
}
// ------------------------------------------------------------------------
// finally, start the infinite shard discovery and consumer launching loop;
// we will escape from this loop only when shutdownFetcher() or stopWithError() is called
// TODO: have this thread emit the records for tracking backpressure
final long discoveryIntervalMillis = Long.parseLong(configProps.getProperty(ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_DISCOVERY_INTERVAL_MILLIS)));
if (this.numberOfActiveShards.get() == 0) {
LOG.info("Subtask {} has no active shards to read on startup; marking the subtask as temporarily idle ...", indexOfThisConsumerSubtask);
sourceContext.markAsTemporarilyIdle();
}
while (running) {
if (LOG.isDebugEnabled()) {
LOG.debug("Subtask {} is trying to discover new shards that were created due to resharding ...", indexOfThisConsumerSubtask);
}
List<StreamShardHandle> newShardsDueToResharding = discoverNewShardsToSubscribe();
for (StreamShardHandle shard : newShardsDueToResharding) {
// since there may be delay in discovering a new shard, all new shards due to
// resharding should be read starting from the earliest record possible
KinesisStreamShardState newShardState = new KinesisStreamShardState(convertToStreamShardMetadata(shard), shard, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get());
int newStateIndex = registerNewSubscribedShardState(newShardState);
if (LOG.isInfoEnabled()) {
LOG.info("Subtask {} has discovered a new shard {} due to resharding, and will start consuming " + "the shard from sequence number {} with ShardConsumer {}", indexOfThisConsumerSubtask, newShardState.getStreamShardHandle().toString(), newShardState.getLastProcessedSequenceNum(), newStateIndex);
}
StreamShardHandle streamShardHandle = newShardState.getStreamShardHandle();
KinesisDeserializationSchema<T> shardDeserializationSchema = getClonedDeserializationSchema();
shardDeserializationSchema.open(RuntimeContextInitializationContextAdapters.deserializationAdapter(runtimeContext, // ignore the provided metric group
metricGroup -> consumerMetricGroup.addGroup("subtaskId", String.valueOf(indexOfThisConsumerSubtask)).addGroup("shardId", streamShardHandle.getShard().getShardId()).addGroup("user")));
shardConsumersExecutor.submit(createShardConsumer(newStateIndex, newShardState.getStreamShardHandle(), newShardState.getLastProcessedSequenceNum(), registerShardMetricGroup(consumerMetricGroup, newShardState), shardDeserializationSchema));
}
// interval if the running flag was set to false during the middle of the while loop
if (running && discoveryIntervalMillis != 0) {
try {
cancelFuture.get(discoveryIntervalMillis, TimeUnit.MILLISECONDS);
LOG.debug("Cancelled discovery");
} catch (TimeoutException iex) {
// timeout is expected when fetcher is not cancelled
}
}
}
// make sure all resources have been terminated before leaving
try {
awaitTermination();
} catch (InterruptedException ie) {
// If there is an original exception, preserve it, since that's more important/useful.
this.error.compareAndSet(null, ie);
}
// any error thrown in the shard consumer threads will be thrown to the main thread
Throwable throwable = this.error.get();
if (throwable != null) {
if (throwable instanceof Exception) {
throw (Exception) throwable;
} else if (throwable instanceof Error) {
throw (Error) throwable;
} else {
throw new Exception(throwable);
}
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcher method discoverNewShardsToSubscribe.
/**
* A utility function that does the following:
*
* <p>1. Find new shards for each stream that we haven't seen before 2. For each new shard,
* determine whether this consumer subtask should subscribe to them; if yes, it is added to the
* returned list of shards 3. Update the subscribedStreamsToLastDiscoveredShardIds state so that
* we won't get shards that we have already seen before the next time this function is called
*/
public List<StreamShardHandle> discoverNewShardsToSubscribe() throws InterruptedException {
List<StreamShardHandle> newShardsToSubscribe = new LinkedList<>();
GetShardListResult shardListResult = kinesis.getShardList(subscribedStreamsToLastDiscoveredShardIds);
if (shardListResult.hasRetrievedShards()) {
Set<String> streamsWithNewShards = shardListResult.getStreamsWithRetrievedShards();
for (String stream : streamsWithNewShards) {
List<StreamShardHandle> newShardsOfStream = shardListResult.getRetrievedShardListOfStream(stream);
for (StreamShardHandle newShard : newShardsOfStream) {
int hashCode = shardAssigner.assign(newShard, totalNumberOfConsumerSubtasks);
if (isThisSubtaskShouldSubscribeTo(hashCode, totalNumberOfConsumerSubtasks, indexOfThisConsumerSubtask)) {
newShardsToSubscribe.add(newShard);
}
}
advanceLastDiscoveredShardOfStream(stream, shardListResult.getLastSeenShardOfStream(stream).getShard().getShardId());
}
}
return newShardsToSubscribe;
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class FlinkKinesisConsumerTest method testFindSequenceNumberToRestoreFromIfTheShardHasBeenClosedSinceTheStateWasStored.
/**
* FLINK-8484: ensure that a state change in the StreamShardMetadata other than {@link
* StreamShardMetadata#getShardId()} or {@link StreamShardMetadata#getStreamName()} does not
* result in the shard not being able to be restored. This handles the corner case where the
* stored shard metadata is open (no ending sequence number), but after the job restore, the
* shard has been closed (ending number set) due to re-sharding, and we can no longer rely on
* {@link StreamShardMetadata#equals(Object)} to find back the sequence number in the collection
* of restored shard metadata.
*
* <p>Therefore, we will rely on synchronizing the snapshot's state with the Kinesis shard
* before attempting to find back the sequence number to restore.
*/
@Test
public void testFindSequenceNumberToRestoreFromIfTheShardHasBeenClosedSinceTheStateWasStored() throws Exception {
// ----------------------------------------------------------------------
// setup initial state
// ----------------------------------------------------------------------
HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
// ----------------------------------------------------------------------
// mock operator state backend and initial state for initializeState()
// ----------------------------------------------------------------------
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
}
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
// ----------------------------------------------------------------------
// mock fetcher
// ----------------------------------------------------------------------
KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
List<StreamShardHandle> shards = new ArrayList<>();
// create a fake stream shard handle based on the first entry in the restored state
final StreamShardHandle originalStreamShardHandle = fakeRestoredState.keySet().iterator().next();
final StreamShardHandle closedStreamShardHandle = new StreamShardHandle(originalStreamShardHandle.getStreamName(), originalStreamShardHandle.getShard());
// close the shard handle by setting an ending sequence number
final SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
sequenceNumberRange.setEndingSequenceNumber("1293844");
closedStreamShardHandle.getShard().setSequenceNumberRange(sequenceNumberRange);
shards.add(closedStreamShardHandle);
when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
// assume the given config is correct
PowerMockito.mockStatic(KinesisConfigUtil.class);
PowerMockito.doNothing().when(KinesisConfigUtil.class);
// ----------------------------------------------------------------------
// start to test fetcher's initial state seeding
// ----------------------------------------------------------------------
TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
consumer.initializeState(initializationContext);
consumer.open(new Configuration());
consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(closedStreamShardHandle), closedStreamShardHandle, fakeRestoredState.get(closedStreamShardHandle)));
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint.
@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededIfRestoringFromCheckpoint() throws Exception {
// ----------------------------------------------------------------------
// setup initial state
// ----------------------------------------------------------------------
HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
// ----------------------------------------------------------------------
// mock operator state backend and initial state for initializeState()
// ----------------------------------------------------------------------
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
}
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
// ----------------------------------------------------------------------
// mock fetcher
// ----------------------------------------------------------------------
KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
List<StreamShardHandle> shards = new ArrayList<>();
shards.addAll(fakeRestoredState.keySet());
when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
// assume the given config is correct
PowerMockito.mockStatic(KinesisConfigUtil.class);
PowerMockito.doNothing().when(KinesisConfigUtil.class);
// ----------------------------------------------------------------------
// start to test fetcher's initial state seeding
// ----------------------------------------------------------------------
TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
consumer.initializeState(initializationContext);
consumer.open(new Configuration());
consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testStreamShardMetadataAndHandleConversion.
@Test
public void testStreamShardMetadataAndHandleConversion() {
String streamName = "fakeStream1";
String shardId = "shard-000001";
String parentShardId = "shard-000002";
String adjacentParentShardId = "shard-000003";
String startingHashKey = "key-000001";
String endingHashKey = "key-000010";
String startingSequenceNumber = "seq-0000021";
String endingSequenceNumber = "seq-00000031";
StreamShardMetadata kinesisStreamShard = new StreamShardMetadata();
kinesisStreamShard.setStreamName(streamName);
kinesisStreamShard.setShardId(shardId);
kinesisStreamShard.setParentShardId(parentShardId);
kinesisStreamShard.setAdjacentParentShardId(adjacentParentShardId);
kinesisStreamShard.setStartingHashKey(startingHashKey);
kinesisStreamShard.setEndingHashKey(endingHashKey);
kinesisStreamShard.setStartingSequenceNumber(startingSequenceNumber);
kinesisStreamShard.setEndingSequenceNumber(endingSequenceNumber);
Shard shard = new Shard().withShardId(shardId).withParentShardId(parentShardId).withAdjacentParentShardId(adjacentParentShardId).withHashKeyRange(new HashKeyRange().withStartingHashKey(startingHashKey).withEndingHashKey(endingHashKey)).withSequenceNumberRange(new SequenceNumberRange().withStartingSequenceNumber(startingSequenceNumber).withEndingSequenceNumber(endingSequenceNumber));
StreamShardHandle streamShardHandle = new StreamShardHandle(streamName, shard);
assertEquals(kinesisStreamShard, KinesisDataFetcher.convertToStreamShardMetadata(streamShardHandle));
assertEquals(streamShardHandle, KinesisDataFetcher.convertToStreamShardHandle(kinesisStreamShard));
}
Aggregations