use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisPubsubClient method readAllMessages.
public <T> List<T> readAllMessages(String streamName, Function<byte[], T> deserialiser) throws Exception {
KinesisProxyInterface kinesisProxy = KinesisProxy.create(properties);
Map<String, String> streamNamesWithLastSeenShardIds = new HashMap<>();
streamNamesWithLastSeenShardIds.put(streamName, null);
GetShardListResult shardListResult = kinesisProxy.getShardList(streamNamesWithLastSeenShardIds);
int maxRecordsToFetch = 10;
List<T> messages = new ArrayList<>();
// retrieve records from all shards
for (StreamShardHandle ssh : shardListResult.getRetrievedShardListOfStream(streamName)) {
String shardIterator = kinesisProxy.getShardIterator(ssh, "TRIM_HORIZON", null);
GetRecordsResult getRecordsResult = kinesisProxy.getRecords(shardIterator, maxRecordsToFetch);
List<Record> aggregatedRecords = getRecordsResult.getRecords();
for (Record record : aggregatedRecords) {
messages.add(deserialiser.apply(record.getData().array()));
}
}
return messages;
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class UniformShardAssignerTest method testAssignment.
@ParameterizedTest
@MethodSource("testCaseProvider")
public void testAssignment(BigInteger rangeStart, BigInteger rangeEnd, int nSubtasks, int expectedSubtask) {
Shard shard = new Shard().withShardId("shardId-000000003378").withHashKeyRange(new HashKeyRange().withStartingHashKey(rangeStart.toString()).withEndingHashKey(rangeEnd.toString()));
StreamShardHandle handle = new StreamShardHandle("", shard);
// streamName = "" hashes to zero
Assertions.assertEquals(expectedSubtask, Math.abs(new UniformShardAssigner().assign(handle, nSubtasks)) % nSubtasks);
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
streamToShardCount.put("fakeStream1", // fakeStream1 will still have 3 shards after restore
3);
streamToShardCount.put("fakeStream2", // fakeStream2 will still have 2 shards after restore
2);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
}
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist.
@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream1");
fakeStreams.add("fakeStream2");
// fakeStream3 will not have any shards
fakeStreams.add("fakeStream3");
// fakeStream4 will not have any shards
fakeStreams.add("fakeStream4");
Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
// fakeStream1 has 3 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
// fakeStream2 has 2 shards before restore
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
Map<String, Integer> streamToShardCount = new HashMap<>();
// fakeStream1 has fixed 3 shards
streamToShardCount.put("fakeStream1", 3);
// fakeStream2 has fixed 2 shards
streamToShardCount.put("fakeStream2", 2);
// no shards can be found for fakeStream3
streamToShardCount.put("fakeStream3", 0);
// no shards can be found for fakeStream4
streamToShardCount.put("fakeStream4", 0);
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
// using a non-resharded streams kinesis behaviour to represent that Kinesis is not
// resharded AFTER the restore
final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
}
CheckedThread runFetcherThread = new CheckedThread() {
@Override
public void go() throws Exception {
fetcher.runFetcher();
}
};
runFetcherThread.start();
fetcher.waitUntilInitialDiscovery();
fetcher.shutdownFetcher();
runFetcherThread.sync();
// assert that the streams tracked in the state are identical to the subscribed streams
Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
assertEquals(fakeStreams.size(), streamsInState.size());
assertTrue(streamsInState.containsAll(fakeStreams));
// assert that the last seen shards in state is correctly set
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle in project flink by apache.
the class KinesisDataFetcherTest method testPeriodicWatermark.
@Test
public void testPeriodicWatermark() {
final MutableLong clock = new MutableLong();
final MutableBoolean isTemporaryIdle = new MutableBoolean();
final List<Watermark> watermarks = new ArrayList<>();
String fakeStream1 = "fakeStream1";
StreamShardHandle shardHandle = new StreamShardHandle(fakeStream1, new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)));
TestSourceContext<String> sourceContext = new TestSourceContext<String>() {
@Override
public void emitWatermark(Watermark mark) {
watermarks.add(mark);
}
@Override
public void markAsTemporarilyIdle() {
isTemporaryIdle.setTrue();
}
};
HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = new HashMap<>();
final KinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList(fakeStream1), sourceContext, new java.util.Properties(), new KinesisDeserializationSchemaWrapper<>(new org.apache.flink.streaming.util.serialization.SimpleStringSchema()), 1, 1, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(new HashMap<>())) {
@Override
protected long getCurrentTimeMillis() {
return clock.getValue();
}
};
Whitebox.setInternalState(fetcher, "periodicWatermarkAssigner", watermarkAssigner);
SequenceNumber seq = new SequenceNumber("fakeSequenceNumber");
// register shards to subsequently emit records
int shardIndex = fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(shardHandle), shardHandle, seq));
StreamRecord<String> record1 = new StreamRecord<>(String.valueOf(Long.MIN_VALUE), Long.MIN_VALUE);
fetcher.emitRecordAndUpdateState(record1.getValue(), record1.getTimestamp(), shardIndex, seq);
Assert.assertEquals(record1, sourceContext.getCollectedOutputs().poll());
fetcher.emitWatermark();
Assert.assertTrue("potential watermark equals previous watermark", watermarks.isEmpty());
StreamRecord<String> record2 = new StreamRecord<>(String.valueOf(1), 1);
fetcher.emitRecordAndUpdateState(record2.getValue(), record2.getTimestamp(), shardIndex, seq);
Assert.assertEquals(record2, sourceContext.getCollectedOutputs().poll());
fetcher.emitWatermark();
Assert.assertFalse("watermark advanced", watermarks.isEmpty());
Assert.assertEquals(new Watermark(record2.getTimestamp()), watermarks.remove(0));
Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());
// test idle timeout
long idleTimeout = 10;
// advance clock idleTimeout
clock.add(idleTimeout + 1);
fetcher.emitWatermark();
Assert.assertFalse("not idle", isTemporaryIdle.booleanValue());
Assert.assertTrue("not idle, no new watermark", watermarks.isEmpty());
// activate idle timeout
Whitebox.setInternalState(fetcher, "shardIdleIntervalMillis", idleTimeout);
fetcher.emitWatermark();
Assert.assertTrue("idle", isTemporaryIdle.booleanValue());
Assert.assertTrue("idle, no watermark", watermarks.isEmpty());
}
Aggregations