use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.
the class FlinkKinesisProducerTest method testOpen.
@Test
public void testOpen() throws Exception {
MockSerializationSchema<Object> serializationSchema = new MockSerializationSchema<>();
Properties config = TestUtils.getStandardProperties();
FlinkKinesisProducer<Object> producer = new FlinkKinesisProducer<>(serializationSchema, config);
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<>(new StreamSink<>(producer), 1, 1, 0);
testHarness.open();
assertThat("Open method was not called", serializationSchema.isOpenCalled(), is(true));
}
use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.
the class FlinkKinesisConsumerTest method testSourceSynchronization.
@Test
public void testSourceSynchronization() throws Exception {
final String streamName = "fakeStreamName";
final Time maxOutOfOrderness = Time.milliseconds(5);
final long autoWatermarkInterval = 1_000;
final long watermarkSyncInterval = autoWatermarkInterval + 1;
TestWatermarkTracker.WATERMARK.set(0);
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
final KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new OpenCheckingStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
props.setProperty(ConsumerConfigConstants.WATERMARK_SYNC_MILLIS, Long.toString(watermarkSyncInterval));
props.setProperty(ConsumerConfigConstants.WATERMARK_LOOKAHEAD_MILLIS, Long.toString(5));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Collections.singletonList(shard1));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceFunction.SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), getWatermarkTracker(), new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
@Override
protected void emitWatermark() {
// before the watermark timer callback is triggered
synchronized (sourceContext.getCheckpointLock()) {
super.emitWatermark();
}
}
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
sourceFunc.setWatermarkTracker(new TestWatermarkTracker());
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
final ConcurrentLinkedQueue<Object> results = testHarness.getOutput();
final AtomicBoolean throwOnCollect = new AtomicBoolean();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), results) {
@Override
public void markAsTemporarilyIdle() {
}
@Override
public void collect(Serializable element) {
if (throwOnCollect.get()) {
throw new RuntimeException("expected");
}
super.collect(element);
}
@Override
public void emitWatermark(Watermark mark) {
results.add(mark);
}
};
final AtomicReference<Exception> sourceThreadError = new AtomicReference<>();
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
sourceThreadError.set(e);
}
}).start();
ArrayList<Object> expectedResults = new ArrayList<>();
final long record1 = 1;
shard1.put(Long.toString(record1));
expectedResults.add(Long.toString(record1));
awaitRecordCount(results, expectedResults.size());
// at this point we know the fetcher was initialized
final KinesisDataFetcher fetcher = org.powermock.reflect.Whitebox.getInternalState(sourceFunc, "fetcher");
// trigger watermark emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(-4));
// verify watermark
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(0, TestWatermarkTracker.WATERMARK.get());
// trigger sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
TestWatermarkTracker.assertGlobalWatermark(-4);
final long record2 = record1 + (watermarkSyncInterval * 3) + 1;
shard1.put(Long.toString(record2));
// wait for the record to be buffered in the emitter
final RecordEmitter<?> emitter = org.powermock.reflect.Whitebox.getInternalState(fetcher, "recordEmitter");
RecordEmitter.RecordQueue emitterQueue = emitter.getQueue(0);
Deadline deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && emitterQueue.getSize() < 1) {
Thread.sleep(10);
}
assertEquals("first record received", 1, emitterQueue.getSize());
// Advance the watermark. Since the new record is past global watermark + threshold,
// it won't be emitted and the watermark does not advance
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
assertEquals(3000L, (long) org.powermock.reflect.Whitebox.getInternalState(fetcher, "nextWatermark"));
TestWatermarkTracker.assertGlobalWatermark(-4);
// Trigger global watermark sync
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
expectedResults.add(Long.toString(record2));
awaitRecordCount(results, expectedResults.size());
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
TestWatermarkTracker.assertGlobalWatermark(3000);
// Trigger watermark update and emit
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
expectedResults.add(new Watermark(3000));
assertThat(results, org.hamcrest.Matchers.contains(expectedResults.toArray()));
// verify exception propagation
Assert.assertNull(sourceThreadError.get());
throwOnCollect.set(true);
shard1.put(Long.toString(record2 + 1));
deadline = Deadline.fromNow(Duration.ofSeconds(10));
while (deadline.hasTimeLeft() && sourceThreadError.get() == null) {
Thread.sleep(10);
}
Assert.assertNotNull(sourceThreadError.get());
Assert.assertNotNull("expected", sourceThreadError.get().getMessage());
sourceFunc.cancel();
testHarness.close();
}
use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.
the class ArrowSourceFunctionTestBase method testParallelProcessing.
@Test
public void testParallelProcessing() throws Exception {
Tuple2<List<RowData>, Integer> testData = getTestData();
final ArrowSourceFunction arrowSourceFunction = createTestArrowSourceFunction(testData.f0, testData.f1);
final AbstractStreamOperatorTestHarness<RowData> testHarness = new AbstractStreamOperatorTestHarness(new StreamSource<>(arrowSourceFunction), 2, 2, 0);
testHarness.open();
final Throwable[] error = new Throwable[2];
final OneShotLatch latch = new OneShotLatch();
final AtomicInteger numOfEmittedElements = new AtomicInteger(0);
final List<RowData> results = Collections.synchronizedList(new ArrayList<>());
// run the source asynchronously
Thread runner = new Thread(() -> {
try {
arrowSourceFunction.run(new DummySourceContext<RowData>() {
@Override
public void collect(RowData element) {
results.add(typeSerializer.copy(element));
if (numOfEmittedElements.incrementAndGet() == testData.f0.size()) {
latch.trigger();
}
}
});
} catch (Throwable t) {
error[0] = t;
}
});
runner.start();
final ArrowSourceFunction arrowSourceFunction2 = createTestArrowSourceFunction(testData.f0, testData.f1);
final AbstractStreamOperatorTestHarness<RowData> testHarness2 = new AbstractStreamOperatorTestHarness(new StreamSource<>(arrowSourceFunction2), 2, 2, 1);
testHarness2.open();
// run the source asynchronously
Thread runner2 = new Thread(() -> {
try {
arrowSourceFunction2.run(new DummySourceContext<RowData>() {
@Override
public void collect(RowData element) {
results.add(typeSerializer.copy(element));
if (numOfEmittedElements.incrementAndGet() == testData.f0.size()) {
latch.trigger();
}
}
});
} catch (Throwable t) {
error[1] = t;
}
});
runner2.start();
if (!latch.isTriggered()) {
latch.await();
}
runner.join();
runner2.join();
testHarness.close();
testHarness2.close();
Assert.assertNull(error[0]);
Assert.assertNull(error[1]);
Assert.assertEquals(testData.f0.size(), numOfEmittedElements.get());
checkElementsEquals(results, testData.f0);
}
use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project beam by apache.
the class BoundedSourceRestoreTest method testRestore.
@Test
public void testRestore() throws Exception {
final int numElements = 102;
final int firstBatchSize = 23;
final int secondBatchSize = numElements - firstBatchSize;
final Set<Long> emittedElements = new HashSet<>();
final Object checkpointLock = new Object();
PipelineOptions options = PipelineOptionsFactory.create();
// bounded source wrapped as unbounded source
BoundedSource<Long> source = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> unboundedSource = new BoundedToUnboundedSourceAdapter<>(source);
UnboundedSourceWrapper<Long, Checkpoint<Long>> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, unboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> sourceOperator = new StreamSource<>(flinkWrapper);
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> testHarness = new AbstractStreamOperatorTestHarness<>(sourceOperator, numTasks, /* max parallelism */
numTasks, /* parallelism */
0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// the first half of elements is read
boolean readFirstBatchOfElements = false;
try {
testHarness.open();
StreamSources.run(sourceOperator, checkpointLock, new PartialCollector<>(emittedElements, firstBatchSize));
} catch (SuccessException e) {
// success
readFirstBatchOfElements = true;
}
assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
// draw a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// finalize checkpoint
final ArrayList<Integer> finalizeList = new ArrayList<>();
TestCountingSource.setFinalizeTracker(finalizeList);
testHarness.notifyOfCompletedCheckpoint(0);
// create a completely new source but restore from the snapshot
BoundedSource<Long> restoredSource = CountingSource.upTo(numElements);
BoundedToUnboundedSourceAdapter<Long> restoredUnboundedSource = new BoundedToUnboundedSourceAdapter<>(restoredSource);
UnboundedSourceWrapper<Long, Checkpoint<Long>> restoredFlinkWrapper = new UnboundedSourceWrapper<>("stepName", options, restoredUnboundedSource, numSplits);
StreamSource<WindowedValue<ValueWithRecordId<Long>>, UnboundedSourceWrapper<Long, Checkpoint<Long>>> restoredSourceOperator = new StreamSource<>(restoredFlinkWrapper);
// set parallelism to 1 to ensure that our testing operator gets all checkpointed state
AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<Long>>> restoredTestHarness = new AbstractStreamOperatorTestHarness<>(restoredSourceOperator, numTasks, /* max parallelism */
1, /* parallelism */
0);
restoredTestHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
// restore snapshot
restoredTestHarness.initializeState(snapshot);
// run again and verify that we see the other elements
boolean readSecondBatchOfElements = false;
try {
restoredTestHarness.open();
StreamSources.run(restoredSourceOperator, checkpointLock, new PartialCollector<>(emittedElements, secondBatchSize));
} catch (SuccessException e) {
// success
readSecondBatchOfElements = true;
}
assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
// verify that we saw all NUM_ELEMENTS elements
assertTrue(emittedElements.size() == numElements);
}
use of org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness in project flink by apache.
the class StatefulSequenceSourceTest method testCheckpointRestore.
@Test
public void testCheckpointRestore() throws Exception {
final int initElement = 0;
final int maxElement = 100;
final int maxParallelsim = 2;
final Set<Long> expectedOutput = new HashSet<>();
for (long i = initElement; i <= maxElement; i++) {
expectedOutput.add(i);
}
final ConcurrentHashMap<String, List<Long>> outputCollector = new ConcurrentHashMap<>();
final OneShotLatch latchToTrigger1 = new OneShotLatch();
final OneShotLatch latchToWait1 = new OneShotLatch();
final OneShotLatch latchToTrigger2 = new OneShotLatch();
final OneShotLatch latchToWait2 = new OneShotLatch();
final StatefulSequenceSource source1 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src1 = new StreamSource<>(source1);
final AbstractStreamOperatorTestHarness<Long> testHarness1 = new AbstractStreamOperatorTestHarness<>(src1, maxParallelsim, 2, 0);
testHarness1.open();
final StatefulSequenceSource source2 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src2 = new StreamSource<>(source2);
final AbstractStreamOperatorTestHarness<Long> testHarness2 = new AbstractStreamOperatorTestHarness<>(src2, maxParallelsim, 2, 1);
testHarness2.open();
final Throwable[] error = new Throwable[3];
// run the source asynchronously
Thread runner1 = new Thread() {
@Override
public void run() {
try {
source1.run(new BlockingSourceContext<>("1", latchToTrigger1, latchToWait1, outputCollector, 21));
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
// run the source asynchronously
Thread runner2 = new Thread() {
@Override
public void run() {
try {
source2.run(new BlockingSourceContext<>("2", latchToTrigger2, latchToWait2, outputCollector, 32));
} catch (Throwable t) {
t.printStackTrace();
error[1] = t;
}
}
};
runner1.start();
runner2.start();
if (!latchToTrigger1.isTriggered()) {
latchToTrigger1.await();
}
if (!latchToTrigger2.isTriggered()) {
latchToTrigger2.await();
}
OperatorSubtaskState snapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness1.snapshot(0L, 0L), testHarness2.snapshot(0L, 0L));
final StatefulSequenceSource source3 = new StatefulSequenceSource(initElement, maxElement);
StreamSource<Long, StatefulSequenceSource> src3 = new StreamSource<>(source3);
final OperatorSubtaskState initState = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelsim, 2, 1, 0);
final AbstractStreamOperatorTestHarness<Long> testHarness3 = new AbstractStreamOperatorTestHarness<>(src3, maxParallelsim, 1, 0);
testHarness3.setup();
testHarness3.initializeState(initState);
testHarness3.open();
final OneShotLatch latchToTrigger3 = new OneShotLatch();
final OneShotLatch latchToWait3 = new OneShotLatch();
latchToWait3.trigger();
// run the source asynchronously
Thread runner3 = new Thread() {
@Override
public void run() {
try {
source3.run(new BlockingSourceContext<>("3", latchToTrigger3, latchToWait3, outputCollector, 3));
} catch (Throwable t) {
t.printStackTrace();
error[2] = t;
}
}
};
runner3.start();
runner3.join();
// we have 3 tasks.
Assert.assertEquals(3, outputCollector.size());
// test for at-most-once
Set<Long> dedupRes = new HashSet<>(Math.abs(maxElement - initElement) + 1);
for (Map.Entry<String, List<Long>> elementsPerTask : outputCollector.entrySet()) {
String key = elementsPerTask.getKey();
List<Long> elements = outputCollector.get(key);
// this tests the correctness of the latches in the test
Assert.assertTrue(elements.size() > 0);
for (Long elem : elements) {
if (!dedupRes.add(elem)) {
Assert.fail("Duplicate entry: " + elem);
}
if (!expectedOutput.contains(elem)) {
Assert.fail("Unexpected element: " + elem);
}
}
}
// test for exactly-once
Assert.assertEquals(Math.abs(initElement - maxElement) + 1, dedupRes.size());
latchToWait1.trigger();
latchToWait2.trigger();
// wait for everybody ot finish.
runner1.join();
runner2.join();
}
Aggregations