use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class DynamoDBStreamsProxy method getShardsOfStream.
private List<StreamShardHandle> getShardsOfStream(String streamName, @Nullable String lastSeenShardId) throws InterruptedException {
List<StreamShardHandle> shardsOfStream = new ArrayList<>();
DescribeStreamResult describeStreamResult;
do {
describeStreamResult = describeStream(streamName, lastSeenShardId);
List<Shard> shards = describeStreamResult.getStreamDescription().getShards();
for (Shard shard : shards) {
shardsOfStream.add(new StreamShardHandle(streamName, shard));
}
if (shards.size() != 0) {
lastSeenShardId = shards.get(shards.size() - 1).getShardId();
}
} while (describeStreamResult.getStreamDescription().isHasMoreShards());
return shardsOfStream;
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededOnlyItsOwnStates.
@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededOnlyItsOwnStates() throws Exception {
// ----------------------------------------------------------------------
// setup initial state
// ----------------------------------------------------------------------
HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("fakeStream1");
HashMap<StreamShardHandle, SequenceNumber> fakeRestoredStateForOthers = getFakeRestoredStore("fakeStream2");
// ----------------------------------------------------------------------
// mock operator state backend and initial state for initializeState()
// ----------------------------------------------------------------------
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
}
for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredStateForOthers.entrySet()) {
listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
}
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
// ----------------------------------------------------------------------
// mock fetcher
// ----------------------------------------------------------------------
KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
List<StreamShardHandle> shards = new ArrayList<>();
shards.addAll(fakeRestoredState.keySet());
when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
// assume the given config is correct
PowerMockito.mockStatic(KinesisConfigUtil.class);
PowerMockito.doNothing().when(KinesisConfigUtil.class);
// ----------------------------------------------------------------------
// start to test fetcher's initial state seeding
// ----------------------------------------------------------------------
TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
consumer.initializeState(initializationContext);
consumer.open(new Configuration());
consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredStateForOthers.entrySet()) {
// should never get restored state not belonging to itself
Mockito.verify(mockedFetcher, never()).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
}
for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
// should get restored state belonging to itself
Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class FlinkKinesisConsumerTest method testFetcherShouldBeCorrectlySeededWithNewDiscoveredKinesisStreamShard.
/*
* This tests that the consumer correctly picks up shards that were not discovered on the previous run.
*
* Case under test:
*
* If the original parallelism is 2 and states are:
* Consumer subtask 1:
* stream1, shard1, SequentialNumber(xxx)
* Consumer subtask 2:
* stream1, shard2, SequentialNumber(yyy)
*
* After discoverNewShardsToSubscribe() if there were two shards (shard3, shard4) created:
* Consumer subtask 1 (late for discoverNewShardsToSubscribe()):
* stream1, shard1, SequentialNumber(xxx)
* Consumer subtask 2:
* stream1, shard2, SequentialNumber(yyy)
* stream1, shard4, SequentialNumber(zzz)
*
* If snapshotState() occurs and parallelism is changed to 1:
* Union state will be:
* stream1, shard1, SequentialNumber(xxx)
* stream1, shard2, SequentialNumber(yyy)
* stream1, shard4, SequentialNumber(zzz)
* Fetcher should be seeded with:
* stream1, shard1, SequentialNumber(xxx)
* stream1, shard2, SequentialNumber(yyy)
* stream1, share3, SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM
* stream1, shard4, SequentialNumber(zzz)
*/
@Test
@SuppressWarnings("unchecked")
public void testFetcherShouldBeCorrectlySeededWithNewDiscoveredKinesisStreamShard() throws Exception {
// ----------------------------------------------------------------------
// setup initial state
// ----------------------------------------------------------------------
HashMap<StreamShardHandle, SequenceNumber> fakeRestoredState = getFakeRestoredStore("all");
// ----------------------------------------------------------------------
// mock operator state backend and initial state for initializeState()
// ----------------------------------------------------------------------
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Map.Entry<StreamShardHandle, SequenceNumber> state : fakeRestoredState.entrySet()) {
listState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(state.getKey()), state.getValue()));
}
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
// ----------------------------------------------------------------------
// mock fetcher
// ----------------------------------------------------------------------
KinesisDataFetcher mockedFetcher = mockKinesisDataFetcher();
List<StreamShardHandle> shards = new ArrayList<>();
shards.addAll(fakeRestoredState.keySet());
shards.add(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))));
when(mockedFetcher.discoverNewShardsToSubscribe()).thenReturn(shards);
// assume the given config is correct
PowerMockito.mockStatic(KinesisConfigUtil.class);
PowerMockito.doNothing().when(KinesisConfigUtil.class);
// ----------------------------------------------------------------------
// start to test fetcher's initial state seeding
// ----------------------------------------------------------------------
TestableFlinkKinesisConsumer consumer = new TestableFlinkKinesisConsumer("fakeStream", new Properties(), 10, 2);
consumer.initializeState(initializationContext);
consumer.open(new Configuration());
consumer.run(Mockito.mock(SourceFunction.SourceContext.class));
fakeRestoredState.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get());
for (Map.Entry<StreamShardHandle, SequenceNumber> restoredShard : fakeRestoredState.entrySet()) {
Mockito.verify(mockedFetcher).registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredShard.getKey()), restoredShard.getKey(), restoredShard.getValue()));
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testShardToSubtaskMappingWithCustomHashFunction.
// ----------------------------------------------------------------------
// Tests shard distribution with custom hash function
// ----------------------------------------------------------------------
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {
int totalCountOfSubtasks = 10;
int shardCount = 3;
for (int i = 0; i < 2; i++) {
final int hash = i;
final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
Map<String, Integer> streamToShardCount = new HashMap<>();
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream");
streamToShardCount.put("fakeStream", shardCount);
for (int j = 0; j < totalCountOfSubtasks; j++) {
int subtaskIndex = j;
// subscribe with default hashing
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new TestSourceContext<>(), new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), totalCountOfSubtasks, subtaskIndex, new AtomicReference<>(), new LinkedList<>(), KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
Whitebox.setInternalState(fetcher, "shardAssigner", // override hashing
allShardsSingleSubtaskFn);
List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
fetcher.shutdownFetcher();
String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
if (j == i) {
assertEquals(msg, shardCount, shards.size());
} else {
assertEquals(msg, 0, shards.size());
}
}
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
streamToShardCount.put("fakeStream1", // fakeStream1 had 3 shards before & 1 new shard after restore
3 + 1);
streamToShardCount.put("fakeStream2", // fakeStream2 had 2 shards before & 3 new shard after restore
2 + 3);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not
// resharded AFTER the restore
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
}
}
Aggregations