use of org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
Map<KinesisStreamShard, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
// fakeStream1 will still have 3 shards after restore
streamToShardCount.put("fakeStream1", 3);
// fakeStream2 will still have 2 shards after restore
streamToShardCount.put("fakeStream2", 2);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new Properties(), 10, 2, new AtomicReference<Throwable>(), new LinkedList<KinesisStreamShardState>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<KinesisStreamShard, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
fetcher.setIsRestoringFromFailure(true);
PowerMockito.whenNew(ShardConsumer.class).withAnyArguments().thenReturn(Mockito.mock(ShardConsumer.class));
Thread runFetcherThread = new Thread(new Runnable() {
@Override
public void run() {
try {
fetcher.runFetcher();
} catch (Exception e) {
//
}
}
});
runFetcherThread.start();
// sleep a while before closing
Thread.sleep(1000);
fetcher.shutdownFetcher();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertTrue(streamsInState.size() == fakeStreams.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertTrue(streamToLastSeenShard.getValue().equals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1)));
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
// fakeStream3 will not have any shards
fakeStreams.add("fakeStream3");
// fakeStream4 will not have any shards
fakeStreams.add("fakeStream4");
Map<KinesisStreamShard, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
// fakeStream1 has fixed 3 shards
streamToShardCount.put("fakeStream1", 3);
// fakeStream2 has fixed 2 shards
streamToShardCount.put("fakeStream2", 2);
// no shards can be found for fakeStream3
streamToShardCount.put("fakeStream3", 0);
// no shards can be found for fakeStream4
streamToShardCount.put("fakeStream4", 0);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new Properties(), 10, 2, new AtomicReference<Throwable>(), new LinkedList<KinesisStreamShardState>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<KinesisStreamShard, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
fetcher.setIsRestoringFromFailure(true);
PowerMockito.whenNew(ShardConsumer.class).withAnyArguments().thenReturn(Mockito.mock(ShardConsumer.class));
Thread runFetcherThread = new Thread(new Runnable() {
@Override
public void run() {
try {
fetcher.runFetcher();
} catch (Exception e) {
//
}
}
});
runFetcherThread.start();
// sleep a while before closing
Thread.sleep(1000);
fetcher.shutdownFetcher();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertTrue(streamsInState.size() == fakeStreams.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1").equals(KinesisShardIdGenerator.generateFromShardOrder(2)));
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2").equals(KinesisShardIdGenerator.generateFromShardOrder(1)));
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3") == null);
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4") == null);
}
use of org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
// fakeStream3 will not have any shards
fakeStreams.add("fakeStream3");
// fakeStream4 will not have any shards
fakeStreams.add("fakeStream4");
Map<KinesisStreamShard, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new KinesisStreamShard("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
// fakeStream1 had 3 shards before & 1 new shard after restore
streamToShardCount.put("fakeStream1", 3 + 1);
// fakeStream2 had 2 shards before & 2 new shard after restore
streamToShardCount.put("fakeStream2", 2 + 3);
// no shards can be found for fakeStream3
streamToShardCount.put("fakeStream3", 0);
// no shards can be found for fakeStream4
streamToShardCount.put("fakeStream4", 0);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not resharded AFTER the restore
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new Properties(), 10, 2, new AtomicReference<Throwable>(), new LinkedList<KinesisStreamShardState>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<KinesisStreamShard, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
fetcher.setIsRestoringFromFailure(true);
PowerMockito.whenNew(ShardConsumer.class).withAnyArguments().thenReturn(Mockito.mock(ShardConsumer.class));
Thread runFetcherThread = new Thread(new Runnable() {
@Override
public void run() {
try {
fetcher.runFetcher();
} catch (Exception e) {
//
}
}
});
runFetcherThread.start();
// sleep a while before closing
Thread.sleep(1000);
fetcher.shutdownFetcher();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertTrue(streamsInState.size() == fakeStreams.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1").equals(KinesisShardIdGenerator.generateFromShardOrder(3)));
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2").equals(KinesisShardIdGenerator.generateFromShardOrder(4)));
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3") == null);
assertTrue(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4") == null);
}
use of org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState in project flink by apache.
the class ShardConsumerTest method testCorrectNumOfCollectedRecordsAndUpdatedStateWithUnexpectedExpiredIterator.
@Test
public void testCorrectNumOfCollectedRecordsAndUpdatedStateWithUnexpectedExpiredIterator() {
KinesisStreamShard fakeToBeConsumedShard = new KinesisStreamShard("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)).withHashKeyRange(new HashKeyRange().withStartingHashKey("0").withEndingHashKey(new BigInteger(StringUtils.repeat("FF", 16), 16).toString())));
LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
subscribedShardsStateUnderTest.add(new KinesisStreamShardState(fakeToBeConsumedShard, new SequenceNumber("fakeStartingState")));
TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(Collections.singletonList("fakeStream"), new Properties(), 10, 2, new AtomicReference<Throwable>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class));
new ShardConsumer<>(fetcher, 0, subscribedShardsStateUnderTest.get(0).getKinesisStreamShard(), subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum(), // and the 7th getRecords() call will encounter an unexpected expired shard iterator
FakeKinesisBehavioursFactory.totalNumOfRecordsAfterNumOfGetRecordsCallsWithUnexpectedExpiredIterator(1000, 9, 7)).run();
assertTrue(fetcher.getNumOfElementsCollected() == 1000);
assertTrue(subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum().equals(SentinelSequenceNumber.SENTINEL_SHARD_ENDING_SEQUENCE_NUM.get()));
}
use of org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState in project flink by apache.
the class FlinkKinesisConsumer method run.
@Override
public void run(SourceContext<T> sourceContext) throws Exception {
// all subtasks will run a fetcher, regardless of whether or not the subtask will initially have
// shards to subscribe to; fetchers will continuously poll for changes in the shard list, so all subtasks
// can potentially have new shards to subscribe to later on
fetcher = new KinesisDataFetcher<>(streams, sourceContext, getRuntimeContext(), configProps, deserializer);
boolean isRestoringFromFailure = (sequenceNumsToRestore != null);
fetcher.setIsRestoringFromFailure(isRestoringFromFailure);
// state and accordingly seed the fetcher with subscribed shards states
if (isRestoringFromFailure) {
for (Map.Entry<KinesisStreamShard, SequenceNumber> restored : lastStateSnapshot.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restored.getKey().getStreamName(), restored.getKey().getShard().getShardId());
if (LOG.isInfoEnabled()) {
LOG.info("Subtask {} is seeding the fetcher with restored shard {}," + " starting state set to the restored sequence number {}", getRuntimeContext().getIndexOfThisSubtask(), restored.getKey().toString(), restored.getValue());
}
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(restored.getKey(), restored.getValue()));
}
}
// check that we are running before starting the fetcher
if (!running) {
return;
}
// start the fetcher loop. The fetcher will stop running only when cancel() or
// close() is called, or an error is thrown by threads created by the fetcher
fetcher.runFetcher();
// check that the fetcher has terminated before fully closing
fetcher.awaitTermination();
sourceContext.close();
}
Aggregations