use of org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper in project flink by apache.
the class ShardConsumerTestUtils method assertNumberOfMessagesReceivedFromKinesis.
public static ShardConsumerMetricsReporter assertNumberOfMessagesReceivedFromKinesis(final int expectedNumberOfMessages, final RecordPublisherFactory recordPublisherFactory, final SequenceNumber startingSequenceNumber, final Properties consumerProperties, final SequenceNumber expectedLastProcessedSequenceNum, final AbstractMetricGroup metricGroup) throws InterruptedException {
ShardConsumerMetricsReporter shardMetricsReporter = new ShardConsumerMetricsReporter(metricGroup);
StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);
LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
subscribedShardsStateUnderTest.add(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, startingSequenceNumber));
TestSourceContext<String> sourceContext = new TestSourceContext<>();
KinesisDeserializationSchemaWrapper<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(Collections.singletonList("fakeStream"), sourceContext, consumerProperties, deserializationSchema, 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class), Mockito.mock(KinesisProxyV2Interface.class));
final StreamShardHandle shardHandle = subscribedShardsStateUnderTest.get(0).getStreamShardHandle();
final SequenceNumber lastProcessedSequenceNum = subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum();
final StartingPosition startingPosition = AWSUtil.getStartingPosition(lastProcessedSequenceNum, consumerProperties);
final RecordPublisher recordPublisher = recordPublisherFactory.create(startingPosition, fetcher.getConsumerConfiguration(), metricGroup, shardHandle);
int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
new ShardConsumer<>(fetcher, recordPublisher, shardIndex, shardHandle, lastProcessedSequenceNum, shardMetricsReporter, deserializationSchema).run();
assertEquals(expectedNumberOfMessages, sourceContext.getCollectedOutputs().size());
assertEquals(expectedLastProcessedSequenceNum, subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
return shardMetricsReporter;
}
use of org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper in project flink by apache.
the class FlinkKinesisConsumerTest method testPeriodicWatermark.
@Test
public void testPeriodicWatermark() throws Exception {
String streamName = "fakeStreamName";
Time maxOutOfOrderness = Time.milliseconds(5);
long autoWatermarkInterval = 1_000;
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {
@Override
public void emitWatermark(Watermark mark) {
watermarks.add(mark);
}
@Override
public void markAsTemporarilyIdle() {
}
};
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
throw new RuntimeException(e);
}
}).start();
shard1.put("1");
shard1.put("2");
shard2.put("10");
int recordCount = 3;
int watermarkCount = 0;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, first watermark is -3
// - Shard-1 @2
// - Shard-2 @10
// - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
// advance watermark
shard1.put("10");
recordCount++;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, second watermark is -3
// - Shard-1 @10
// - Shard-2 @10
// - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
sourceFunc.cancel();
testHarness.close();
assertEquals("record count", recordCount, testHarness.getOutput().size());
assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
assertEquals("watermark count", watermarkCount, watermarks.size());
}
use of org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper in project flink by apache.
the class FlinkKinesisConsumerTest method testSourceSynchronization.
@Test
public void testSourceSynchronization() throws Exception {
final String streamName = "fakeStreamName";
final Time maxOutOfOrderness = Time.milliseconds(5);
final long autoWatermarkInterval = 1_000;
final long watermarkSyncInterval = autoWatermarkInterval + 1;
TestWatermarkTracker.WATERMARK.set(0);
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Collections.singletonList(shard1));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
@Override
protected void emitWatermark() {
// before the watermark timer callback is triggered
synchronized (sourceContext.getCheckpointLock()) {
super.emitWatermark();
}
}
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
final AtomicBoolean throwOnCollect = new AtomicBoolean();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {
@Override
public void markAsTemporarilyIdle() {
}
@Override
public void collect(Serializable element) {
if (throwOnCollect.get()) {
throw new RuntimeException("expected");
}
super.collect(element);
}
@Override
public void emitWatermark(Watermark mark) {
results.add(mark);
}
};
final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
sourceThreadError.set(e);
}
}).start();
ArrayList<Object> expectedResults = new ArrayList<>();
final long record1 = 1;
shard1.put(Long.toString(record1));
expectedResults.add(Long.toString(record1));
awaitRecordCount(results, expectedResults.size());
// at this point we know the fetcher was initialized
final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
// trigger watermark emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(-4));
// verify watermark
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(0, TestWatermarkTracker.WATERMARK.get());
// trigger sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
TestWatermarkTracker.assertGlobalWatermark(-4);
final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
shard1.put(Long.toString(record2));
// wait for the record to be buffered in the emitter
final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
Thread.sleep(10);
}
assertEquals("first record received", 1, emitterQueue.getSize());
// Advance the watermark. Since the new record is past global watermark + threshold,
// it won't be emitted and the watermark does not advance
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
TestWatermarkTracker.assertGlobalWatermark(-4);
// Trigger global watermark sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
expectedResults.add(Long.toString(record2));
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
TestWatermarkTracker.assertGlobalWatermark(3000);
// Trigger watermark update and emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(3000));
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
// verify exception propagation
Assert.assertNull(sourceThreadError.get());
throwOnCollect.set(true);
shard1.put(Long.toString(record2 + 1));
deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
Thread.sleep(10);
}
Assert.assertNotNull(sourceThreadError.get());
Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
sourceFunc.cancel();
testHarness.close();
}
use of org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper in project flink by apache.
the class KinesisDataFetcherTest method testShardToSubtaskMappingWithCustomHashFunction.
// ----------------------------------------------------------------------
// Tests shard distribution with custom hash function
// ----------------------------------------------------------------------
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {
int totalCountOfSubtasks = 10;
int shardCount = 3;
for (int i = 0; i < 2; i++) {
final int hash = i;
final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
Map<String, Integer> streamToShardCount = new HashMap<>();
List<String> fakeStreams = new LinkedList<>();
fakeStreams.add("fakeStream");
streamToShardCount.put("fakeStream", shardCount);
for (int j = 0; j < totalCountOfSubtasks; j++) {
int subtaskIndex = j;
// subscribe with default hashing
final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new TestSourceContext<>(), new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), totalCountOfSubtasks, subtaskIndex, new AtomicReference<>(), new LinkedList<>(), KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
Whitebox.setInternalState(fetcher, "shardAssigner", // override hashing
allShardsSingleSubtaskFn);
List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
fetcher.shutdownFetcher();
String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
if (j == i) {
assertEquals(msg, shardCount, shards.size());
} else {
assertEquals(msg, 0, shards.size());
}
}
}
}
Aggregations