use of org.apache.flink.streaming.util.CollectingSourceContext in project flink by apache.
the class FlinkKinesisConsumerTest method testPeriodicWatermark.
@Test
public void testPeriodicWatermark() throws Exception {
String streamName = "fakeStreamName";
Time maxOutOfOrderness = Time.milliseconds(5);
long autoWatermarkInterval = 1_000;
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {
@Override
public void emitWatermark(Watermark mark) {
watermarks.add(mark);
}
@Override
public void markAsTemporarilyIdle() {
}
};
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
throw new RuntimeException(e);
}
}).start();
shard1.put("1");
shard1.put("2");
shard2.put("10");
int recordCount = 3;
int watermarkCount = 0;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, first watermark is -3
// - Shard-1 @2
// - Shard-2 @10
// - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
// advance watermark
shard1.put("10");
recordCount++;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, second watermark is -3
// - Shard-1 @10
// - Shard-2 @10
// - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
sourceFunc.cancel();
testHarness.close();
assertEquals("record count", recordCount, testHarness.getOutput().size());
assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
assertEquals("watermark count", watermarkCount, watermarks.size());
}
use of org.apache.flink.streaming.util.CollectingSourceContext in project flink by apache.
the class FlinkKinesisConsumerTest method testSourceSynchronization.
@Test
public void testSourceSynchronization() throws Exception {
final String streamName = "fakeStreamName";
final Time maxOutOfOrderness = Time.milliseconds(5);
final long autoWatermarkInterval = 1_000;
final long watermarkSyncInterval = autoWatermarkInterval + 1;
TestWatermarkTracker.WATERMARK.set(0);
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Collections.singletonList(shard1));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
@Override
protected void emitWatermark() {
// before the watermark timer callback is triggered
synchronized (sourceContext.getCheckpointLock()) {
super.emitWatermark();
}
}
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
final AtomicBoolean throwOnCollect = new AtomicBoolean();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {
@Override
public void markAsTemporarilyIdle() {
}
@Override
public void collect(Serializable element) {
if (throwOnCollect.get()) {
throw new RuntimeException("expected");
}
super.collect(element);
}
@Override
public void emitWatermark(Watermark mark) {
results.add(mark);
}
};
final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
sourceThreadError.set(e);
}
}).start();
ArrayList<Object> expectedResults = new ArrayList<>();
final long record1 = 1;
shard1.put(Long.toString(record1));
expectedResults.add(Long.toString(record1));
awaitRecordCount(results, expectedResults.size());
// at this point we know the fetcher was initialized
final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
// trigger watermark emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(-4));
// verify watermark
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(0, TestWatermarkTracker.WATERMARK.get());
// trigger sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
TestWatermarkTracker.assertGlobalWatermark(-4);
final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
shard1.put(Long.toString(record2));
// wait for the record to be buffered in the emitter
final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
Thread.sleep(10);
}
assertEquals("first record received", 1, emitterQueue.getSize());
// Advance the watermark. Since the new record is past global watermark + threshold,
// it won't be emitted and the watermark does not advance
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
TestWatermarkTracker.assertGlobalWatermark(-4);
// Trigger global watermark sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
expectedResults.add(Long.toString(record2));
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
TestWatermarkTracker.assertGlobalWatermark(3000);
// Trigger watermark update and emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(3000));
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
// verify exception propagation
Assert.assertNull(sourceThreadError.get());
throwOnCollect.set(true);
shard1.put(Long.toString(record2 + 1));
deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
Thread.sleep(10);
}
Assert.assertNotNull(sourceThreadError.get());
Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
sourceFunc.cancel();
testHarness.close();
}
Aggregations