use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
streamToShardCount.put("fakeStream1", // fakeStream1 will still have 3 shards after restore
3);
streamToShardCount.put("fakeStream2", // fakeStream2 will still have 2 shards after restore
2);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
// fakeStream3 will not have any shards
fakeStreams.add("fakeStream3");
// fakeStream4 will not have any shards
fakeStreams.add("fakeStream4");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
// fakeStream1 has fixed 3 shards
streamToShardCount.put("fakeStream1", 3);
// fakeStream2 has fixed 2 shards
streamToShardCount.put("fakeStream2", 2);
// no shards can be found for fakeStream3
streamToShardCount.put("fakeStream3", 0);
// no shards can be found for fakeStream4
streamToShardCount.put("fakeStream4", 0);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not
// resharded AFTER the restore
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testRecordPublisherFactoryIsTornDownWhenDeregisterStreamConsumerThrowsException.
@Test(timeout = 10000)
public void testRecordPublisherFactoryIsTornDownWhenDeregisterStreamConsumerThrowsException() throws InterruptedException {
KinesisProxyV2Interface kinesisV2 = mock(KinesisProxyV2Interface.class);
TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList("fakeStream1"), new TestSourceContext<>(), TestUtils.efoProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), new HashMap<>(), mock(KinesisProxyInterface.class), kinesisV2) {
@Override
protected void deregisterStreamConsumer() {
throw new RuntimeException();
}
};
fetcher.shutdownFetcher();
verify(kinesisV2).close();
fetcher.awaitTermination();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
// fakeStream3 will not have any shards
fakeStreams.add("fakeStream3");
// fakeStream4 will not have any shards
fakeStreams.add("fakeStream4");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
streamToShardCount.put("fakeStream1", // fakeStream1 had 3 shards before & 1 new shard after restore
3 + 1);
streamToShardCount.put("fakeStream2", // fakeStream2 had 2 shards before & 2 new shard after restore
2 + 3);
// no shards can be found for fakeStream3
streamToShardCount.put("fakeStream3", 0);
// no shards can be found for fakeStream4
streamToShardCount.put("fakeStream4", 0);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not
// resharded AFTER the restore
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(3), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(4), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerTest method testUseRestoredStateForSnapshotIfFetcherNotInitialized.
// ----------------------------------------------------------------------
// Tests related to state initialization
// ----------------------------------------------------------------------
@Test
public void testUseRestoredStateForSnapshotIfFetcherNotInitialized() throws Exception {
Properties config = TestUtils.getStandardProperties();
List<Tuple2<StreamShardMetadata, SequenceNumber>> globalUnionState = new ArrayList<>(4);
globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("1")));
globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("1")));
globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(3)))), new SequenceNumber("1")));
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Tuple2<StreamShardMetadata, SequenceNumber> state : globalUnionState) {
listState.add(state);
}
FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
RuntimeContext context = mock(RuntimeContext.class);
when(context.getIndexOfThisSubtask()).thenReturn(0);
when(context.getNumberOfParallelSubtasks()).thenReturn(2);
consumer.setRuntimeContext(context);
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
consumer.initializeState(initializationContext);
// only opened, not run
consumer.open(new Configuration());
// arbitrary checkpoint id and timestamp
consumer.snapshotState(new StateSnapshotContextSynchronousImpl(123, 123));
assertTrue(listState.isClearCalled());
// the checkpointed list state should contain only the shards that it should subscribe to
assertEquals(globalUnionState.size() / 2, listState.getList().size());
assertTrue(listState.getList().contains(globalUnionState.get(0)));
assertTrue(listState.getList().contains(globalUnionState.get(2)));
}
Aggregations