use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testShardToSubtaskMappingWithCustomHashFunction.
// ----------------------------------------------------------------------
// Tests shard distribution with custom hash function
// ----------------------------------------------------------------------
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {
int totalCountOfSubtasks = 10;
int shardCount = 3;
for (int i = 0; i < 2; i++) {
final int hash = i;
final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
Map<String, Integer> streamToShardCount = new HashMap<>();
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream");
streamToShardCount.put("fakeStream", shardCount);
for (int j = 0; j < totalCountOfSubtasks; j++) {
int subtaskIndex = j;
// subscribe with default hashing
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new TestSourceContext<>(), new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), totalCountOfSubtasks, subtaskIndex, new AtomicReference<>(), new LinkedList<>(), KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
Whitebox.setInternalState(fetcher, "shardAssigner", // override hashing
allShardsSingleSubtaskFn);
List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
fetcher.shutdownFetcher();
String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
if (j == i) {
assertEquals(msg, shardCount, shards.size());
} else {
assertEquals(msg, 0, shards.size());
}
}
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
streamToShardCount.put("fakeStream1", // fakeStream1 had 3 shards before & 1 new shard after restore
3 + 1);
streamToShardCount.put("fakeStream2", // fakeStream2 had 2 shards before & 3 new shard after restore
2 + 3);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not
// resharded AFTER the restore
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
fakeStreams.add("fakeStream3");
fakeStreams.add("fakeStream4");
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
Map<String, Integer> streamToShardCount = new HashMap<>();
Random rand = new Random();
for (String fakeStream : fakeStreams) {
streamToShardCount.put(fakeStream, rand.nextInt(5) + 1);
}
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
CheckedThread consumerThread = new CheckedThread() {
@Override
public void go() throws Exception {
consumer.run(new TestSourceContext<>());
}
};
consumerThread.start();
fetcher.waitUntilRun();
consumer.cancel();
consumerThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KinesisDataFetcherTest method testSkipCorruptedRecord.
@Test
public void testSkipCorruptedRecord() throws Exception {
final String stream = "fakeStream";
final int numShards = 3;
final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
final TestSourceContext<String> sourceContext = new TestSourceContext<>();
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(singletonList(stream), sourceContext, TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 1, 0, new AtomicReference<>(), testShardStates, new HashMap<>(), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));
// FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
// run the consumer until it reaches the point where the fetcher starts to run
final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
CheckedThread consumerThread = new CheckedThread() {
@Override
public void go() throws Exception {
consumer.run(new TestSourceContext<>());
}
};
consumerThread.start();
fetcher.waitUntilRun();
consumer.cancel();
consumerThread.sync();
assertEquals(numShards, testShardStates.size());
for (int i = 0; i < numShards; i++) {
fetcher.emitRecordAndUpdateState("record-" + i, 10L, i, new SequenceNumber("seq-num-1"));
assertEquals(new SequenceNumber("seq-num-1"), testShardStates.get(i).getLastProcessedSequenceNum());
assertEquals(new StreamRecord<>("record-" + i, 10L), sourceContext.removeLatestOutput());
}
// emitting a null (i.e., a corrupt record) should not produce any output, but still have
// the shard state updated
fetcher.emitRecordAndUpdateState(null, 10L, 1, new SequenceNumber("seq-num-2"));
assertEquals(new SequenceNumber("seq-num-2"), testShardStates.get(1).getLastProcessedSequenceNum());
// no output should have been collected
assertNull(sourceContext.removeLatestOutput());
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KafkaSinkExternalContext method createSink.
@Override
public Sink<String> createSink(TestingSinkSettings sinkSettings) {
if (!topicExists(topicName)) {
createTopic(topicName, 4, (short) 1);
}
KafkaSinkBuilder<String> builder = KafkaSink.builder();
final Properties properties = new Properties();
properties.put(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, DEFAULT_TRANSACTION_TIMEOUT_IN_MS);
builder.setBootstrapServers(bootstrapServers).setDeliverGuarantee(toDeliveryGuarantee(sinkSettings.getCheckpointingMode())).setTransactionalIdPrefix("testingFramework").setKafkaProducerConfig(properties).setRecordSerializer(KafkaRecordSerializationSchema.builder().setTopic(topicName).setValueSerializationSchema(new SimpleStringSchema()).build());
return builder.build();
}
Aggregations