use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class BucketingSinkTest method testScalingUp.
@Test
public void testScalingUp() throws Exception {
final File outDir = tempFolder.newFolder();
OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 2, 0, 100);
testHarness1.setup();
testHarness1.open();
OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 2, 0, 100);
testHarness2.setup();
testHarness2.open();
testHarness1.processElement(new StreamRecord<>("test1", 1L));
testHarness1.processElement(new StreamRecord<>("test2", 1L));
checkFs(outDir, 2, 0, 0, 0);
testHarness2.processElement(new StreamRecord<>("test3", 1L));
testHarness2.processElement(new StreamRecord<>("test4", 1L));
testHarness2.processElement(new StreamRecord<>("test5", 1L));
checkFs(outDir, 5, 0, 0, 0);
// intentionally we snapshot them in the reverse order so that the states are shuffled
OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
testHarness1 = createRescalingTestSink(outDir, 3, 0, 100);
testHarness1.setup();
testHarness1.initializeState(mergedSnapshot);
testHarness1.open();
checkFs(outDir, 2, 0, 3, 3);
testHarness2 = createRescalingTestSink(outDir, 3, 1, 100);
testHarness2.setup();
testHarness2.initializeState(mergedSnapshot);
testHarness2.open();
checkFs(outDir, 0, 0, 5, 5);
OneInputStreamOperatorTestHarness<String, Object> testHarness3 = createRescalingTestSink(outDir, 3, 2, 100);
testHarness3.setup();
testHarness3.initializeState(mergedSnapshot);
testHarness3.open();
checkFs(outDir, 0, 0, 5, 5);
testHarness1.processElement(new StreamRecord<>("test6", 0));
testHarness2.processElement(new StreamRecord<>("test6", 0));
testHarness3.processElement(new StreamRecord<>("test6", 0));
checkFs(outDir, 3, 0, 5, 5);
testHarness1.snapshot(1, 0);
testHarness2.snapshot(1, 0);
testHarness3.snapshot(1, 0);
testHarness1.close();
testHarness2.close();
testHarness3.close();
checkFs(outDir, 0, 3, 5, 5);
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class RMQSourceTest method testCheckpointing.
@Test
public void testCheckpointing() throws Exception {
source.autoAck = false;
StreamSource<String, RMQSource<String>> src = new StreamSource<>(source);
AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.open();
sourceThread.start();
Thread.sleep(5);
final Random random = new Random(System.currentTimeMillis());
int numSnapshots = 50;
long previousSnapshotId;
long lastSnapshotId = 0;
long totalNumberOfAcks = 0;
for (int i = 0; i < numSnapshots; i++) {
long snapshotId = random.nextLong();
OperatorStateHandles data;
synchronized (DummySourceContext.lock) {
data = testHarness.snapshot(snapshotId, System.currentTimeMillis());
previousSnapshotId = lastSnapshotId;
lastSnapshotId = messageId;
}
// let some time pass
Thread.sleep(5);
// check if the correct number of messages have been snapshotted
final long numIds = lastSnapshotId - previousSnapshotId;
RMQTestSource sourceCopy = new RMQTestSource();
StreamSource<String, RMQTestSource> srcCopy = new StreamSource<>(sourceCopy);
AbstractStreamOperatorTestHarness<String> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
testHarnessCopy.setup();
testHarnessCopy.initializeState(data);
testHarnessCopy.open();
ArrayDeque<Tuple2<Long, List<String>>> deque = sourceCopy.getRestoredState();
List<String> messageIds = deque.getLast().f1;
assertEquals(numIds, messageIds.size());
if (messageIds.size() > 0) {
assertEquals(lastSnapshotId, (long) Long.valueOf(messageIds.get(messageIds.size() - 1)));
}
// check if the messages are being acknowledged and the transaction committed
synchronized (DummySourceContext.lock) {
source.notifyCheckpointComplete(snapshotId);
}
totalNumberOfAcks += numIds;
}
Mockito.verify(source.channel, Mockito.times((int) totalNumberOfAcks)).basicAck(Mockito.anyLong(), Mockito.eq(false));
Mockito.verify(source.channel, Mockito.times(numSnapshots)).txCommit();
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class ContinuousFileProcessingTest method testFunctionRestore.
@Test
public void testFunctionRestore() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
org.apache.hadoop.fs.Path path = null;
long fileModTime = Long.MIN_VALUE;
for (int i = 0; i < 1; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
path = file.f0;
fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.open();
final Throwable[] error = new Throwable[1];
final OneShotLatch latch = new OneShotLatch();
final DummySourceContext sourceContext = new DummySourceContext() {
@Override
public void collect(TimestampedFileInputSplit element) {
latch.trigger();
}
};
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
monitoringFunction.run(sourceContext);
} catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
// first condition for the source to have updated its state: emit at least one element
if (!latch.isTriggered()) {
latch.await();
}
// this means it has processed all the splits and updated its state.
synchronized (sourceContext.getCheckpointLock()) {
}
OperatorStateHandles snapshot = testHarness.snapshot(0, 0);
monitoringFunction.cancel();
runner.join();
testHarness.close();
final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy);
AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
testHarnessCopy.initializeState(snapshot);
testHarnessCopy.open();
Assert.assertNull(error[0]);
Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());
hdfs.delete(path, false);
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class RollingSinkITCase method testScalingDown.
@Test
public void testScalingDown() throws Exception {
final File outDir = tempFolder.newFolder();
OneInputStreamOperatorTestHarness<String, Object> testHarness1 = createRescalingTestSink(outDir, 3, 0);
testHarness1.setup();
testHarness1.open();
OneInputStreamOperatorTestHarness<String, Object> testHarness2 = createRescalingTestSink(outDir, 3, 1);
testHarness2.setup();
testHarness2.open();
OneInputStreamOperatorTestHarness<String, Object> testHarness3 = createRescalingTestSink(outDir, 3, 2);
testHarness3.setup();
testHarness3.open();
testHarness1.processElement(new StreamRecord<>("test1", 0L));
checkFs(outDir, 1, 0, 0, 0);
testHarness2.processElement(new StreamRecord<>("test2", 0L));
testHarness2.processElement(new StreamRecord<>("test3", 0L));
testHarness2.processElement(new StreamRecord<>("test4", 0L));
testHarness2.processElement(new StreamRecord<>("test5", 0L));
testHarness2.processElement(new StreamRecord<>("test6", 0L));
checkFs(outDir, 2, 4, 0, 0);
testHarness3.processElement(new StreamRecord<>("test7", 0L));
testHarness3.processElement(new StreamRecord<>("test8", 0L));
checkFs(outDir, 3, 5, 0, 0);
// intentionally we snapshot them in a not ascending order so that the states are shuffled
OperatorStateHandles mergedSnapshot = AbstractStreamOperatorTestHarness.repackageState(testHarness3.snapshot(0, 0), testHarness1.snapshot(0, 0), testHarness2.snapshot(0, 0));
// with the above state reshuffling, we expect testHarness4 to take the
// state of the previous testHarness3 and testHarness1 while testHarness5
// will take that of the previous testHarness1
OneInputStreamOperatorTestHarness<String, Object> testHarness4 = createRescalingTestSink(outDir, 2, 0);
testHarness4.setup();
testHarness4.initializeState(mergedSnapshot);
testHarness4.open();
// we do not have a length file for part-2-0 because bucket part-2-0
// was not "in-progress", but "pending" (its full content is valid).
checkFs(outDir, 1, 4, 3, 2);
OneInputStreamOperatorTestHarness<String, Object> testHarness5 = createRescalingTestSink(outDir, 2, 1);
testHarness5.setup();
testHarness5.initializeState(mergedSnapshot);
testHarness5.open();
checkFs(outDir, 0, 0, 8, 3);
}
use of org.apache.flink.streaming.runtime.tasks.OperatorStateHandles in project flink by apache.
the class RollingSinkITCase method testBucketStateTransitions.
@Test
public void testBucketStateTransitions() throws Exception {
final File outDir = tempFolder.newFolder();
OneInputStreamOperatorTestHarness<String, Object> testHarness = createRescalingTestSink(outDir, 1, 0);
testHarness.setup();
testHarness.open();
testHarness.setProcessingTime(0L);
// we have a bucket size of 5 bytes, so each record will get its own bucket,
// i.e. the bucket should roll after every record.
testHarness.processElement(new StreamRecord<>("test1", 1L));
testHarness.processElement(new StreamRecord<>("test2", 1L));
checkFs(outDir, 1, 1, 0, 0);
testHarness.processElement(new StreamRecord<>("test3", 1L));
checkFs(outDir, 1, 2, 0, 0);
testHarness.snapshot(0, 0);
checkFs(outDir, 1, 2, 0, 0);
testHarness.notifyOfCompletedCheckpoint(0);
checkFs(outDir, 1, 0, 2, 0);
OperatorStateHandles snapshot = testHarness.snapshot(1, 0);
testHarness.close();
checkFs(outDir, 0, 1, 2, 0);
testHarness = createRescalingTestSink(outDir, 1, 0);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
checkFs(outDir, 0, 0, 3, 1);
snapshot = testHarness.snapshot(2, 0);
testHarness.processElement(new StreamRecord<>("test4", 10));
checkFs(outDir, 1, 0, 3, 1);
testHarness = createRescalingTestSink(outDir, 1, 0);
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
// the in-progress file remains as we do not clean up now
checkFs(outDir, 1, 0, 3, 1);
testHarness.close();
// at close it is not moved to final because it is not part
// of the current task's state, it was just a not cleaned up leftover.
checkFs(outDir, 1, 0, 3, 1);
}
Aggregations