use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.
the class ContinuousFileProcessingRescalingTest method testReaderScalingDown.
@Test
public void testReaderScalingDown() throws Exception {
// simulates the scenario of scaling down from 2 to 1 instances
final OneShotLatch waitingLatch = new OneShotLatch();
// create the first instance and let it process the first split till element 5
final OneShotLatch triggerLatch1 = new OneShotLatch();
BlockingFileInputFormat format1 = new BlockingFileInputFormat(triggerLatch1, waitingLatch, new Path("test"), 20, 5);
FileInputSplit[] splits = format1.createInputSplits(2);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness1 = getTestHarness(format1, 2, 0);
testHarness1.open();
testHarness1.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[0])));
// wait until its arrives to element 5
if (!triggerLatch1.isTriggered()) {
triggerLatch1.await();
}
// create the second instance and let it process the second split till element 15
final OneShotLatch triggerLatch2 = new OneShotLatch();
BlockingFileInputFormat format2 = new BlockingFileInputFormat(triggerLatch2, waitingLatch, new Path("test"), 20, 15);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness2 = getTestHarness(format2, 2, 1);
testHarness2.open();
testHarness2.processElement(new StreamRecord<>(getTimestampedSplit(0, splits[1])));
// wait until its arrives to element 15
if (!triggerLatch2.isTriggered()) {
triggerLatch2.await();
}
// 1) clear the outputs of the two previous instances so that
// we can compare their newly produced outputs with the merged one
testHarness1.getOutput().clear();
testHarness2.getOutput().clear();
// 2) and take the snapshots from the previous instances and merge them
// into a new one which will be then used to initialize a third instance
OperatorStateHandles mergedState = AbstractStreamOperatorTestHarness.repackageState(testHarness2.snapshot(0, 0), testHarness1.snapshot(0, 0));
// create the third instance
final OneShotLatch wLatch = new OneShotLatch();
final OneShotLatch tLatch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(wLatch, tLatch, new Path("test"), 20, 5);
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = getTestHarness(format, 1, 0);
// initialize the state of the new operator with the constructed by
// combining the partial states of the instances above.
testHarness.initializeState(mergedState);
testHarness.open();
// now restart the waiting operators
wLatch.trigger();
tLatch.trigger();
waitingLatch.trigger();
// and wait for the processing to finish
synchronized (testHarness1.getCheckpointLock()) {
testHarness1.close();
}
synchronized (testHarness2.getCheckpointLock()) {
testHarness2.close();
}
synchronized (testHarness.getCheckpointLock()) {
testHarness.close();
}
Queue<Object> expectedResult = new ArrayDeque<>();
putElementsInQ(expectedResult, testHarness1.getOutput());
putElementsInQ(expectedResult, testHarness2.getOutput());
Queue<Object> actualResult = new ArrayDeque<>();
putElementsInQ(actualResult, testHarness.getOutput());
Assert.assertEquals(20, actualResult.size());
Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());
}
use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.
the class TimestampedFileInputSplitTest method testPriorityQ.
@Test
public void testPriorityQ() {
TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);
Queue<TimestampedFileInputSplit> pendingSplits = new PriorityQueue<>();
pendingSplits.add(richSecondSplit);
pendingSplits.add(richForthSplit);
pendingSplits.add(richFirstSplit);
pendingSplits.add(richFifthSplit);
pendingSplits.add(richFifthSplit);
pendingSplits.add(richThirdSplit);
List<TimestampedFileInputSplit> actualSortedSplits = new ArrayList<>();
while (true) {
actualSortedSplits.add(pendingSplits.poll());
if (pendingSplits.isEmpty()) {
break;
}
}
List<TimestampedFileInputSplit> expectedSortedSplits = new ArrayList<>();
expectedSortedSplits.add(richFirstSplit);
expectedSortedSplits.add(richThirdSplit);
expectedSortedSplits.add(richSecondSplit);
expectedSortedSplits.add(richForthSplit);
expectedSortedSplits.add(richFifthSplit);
expectedSortedSplits.add(richFifthSplit);
Assert.assertArrayEquals(expectedSortedSplits.toArray(), actualSortedSplits.toArray());
}
use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.
the class ContinuousFileProcessingITCase method testProgram.
// END OF PREPARATIONS
@Override
protected void testProgram() throws Exception {
/*
* This test checks the interplay between the monitor and the reader
* and also the failExternally() functionality. To test the latter we
* set the parallelism to 1 so that we have the chaining between the sink,
* which throws the SuccessException to signal the end of the test, and the
* reader.
* */
TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
format.setFilePath(hdfsURI);
format.setFilesFilter(FilePathFilter.createDefaultFilter());
// create the stream execution environment with a parallelism > 1 to test
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL);
// the monitor has always DOP 1
DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
Assert.assertEquals(1, splits.getParallelism());
ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);
// the readers can be multiple
DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
Assert.assertEquals(PARALLELISM, content.getParallelism());
// finally for the sink we set the parallelism to 1 so that we can verify the output
TestingSinkFunction sink = new TestingSinkFunction();
content.addSink(sink).setParallelism(1);
Thread job = new Thread() {
@Override
public void run() {
try {
env.execute("ContinuousFileProcessingITCase Job.");
} catch (Exception e) {
Throwable th = e;
for (int depth = 0; depth < 20; depth++) {
if (th instanceof SuccessException) {
try {
postSubmit();
} catch (Exception e1) {
e1.printStackTrace();
}
return;
} else if (th.getCause() != null) {
th = th.getCause();
} else {
break;
}
}
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
};
job.start();
// The modification time of the last created file.
long lastCreatedModTime = Long.MIN_VALUE;
// create the files to be read
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
long modTime;
do {
// give it some time so that the files have
// different modification timestamps.
Thread.sleep(50);
tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");
modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
if (modTime <= lastCreatedModTime) {
// delete the last created file to recreate it with a different timestamp
hdfs.delete(tmpFile.f0, false);
}
} while (modTime <= lastCreatedModTime);
lastCreatedModTime = modTime;
// put the contents in the expected results list before the reader picks them
// this is to guarantee that they are in before the reader finishes (avoid race conditions)
expectedContents.put(i, tmpFile.f1);
org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
hdfs.rename(tmpFile.f0, file);
Assert.assertTrue(hdfs.exists(file));
}
// wait for the job to finish.
job.join();
}
use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.
the class ContinuousFileProcessingMigrationTest method testReaderSnapshotRestore.
// END OF PREPARATIONS
// TESTS
@Test
public void testReaderSnapshotRestore() throws Exception {
/*
FileInputSplit split1 =
new FileInputSplit(3, new Path("test/test1"), 0, 100, null);
FileInputSplit split2 =
new FileInputSplit(2, new Path("test/test2"), 101, 200, null);
FileInputSplit split3 =
new FileInputSplit(1, new Path("test/test2"), 0, 100, null);
FileInputSplit split4 =
new FileInputSplit(0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit, ?> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<FileInputSplit, FileInputSplit> initTestInstance =
new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.open();
// create some state in the reader
initTestInstance.processElement(new StreamRecord<>(split1));
initTestInstance.processElement(new StreamRecord<>(split2));
initTestInstance.processElement(new StreamRecord<>(split3));
initTestInstance.processElement(new StreamRecord<>(split4));
// take a snapshot of the operator's state. This will be used
// to initialize another reader and compare the results of the
// two operators.
final StreamTaskState snapshot;
synchronized (initTestInstance.getCheckpointLock()) {
snapshot = initTestInstance.snapshot(0L, 0L);
}
initTestInstance.snaphotToFile(snapshot, "src/test/resources/reader-migration-test-flink1.1-snapshot");
*/
TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null);
TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null);
TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null);
TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null);
final OneShotLatch latch = new OneShotLatch();
BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(hdfsURI));
TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format);
ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format);
initReader.setOutputType(typeInfo, new ExecutionConfig());
OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader);
initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime);
initTestInstance.setup();
initTestInstance.initializeStateFromLegacyCheckpoint(getResourceFilename("reader-migration-test-flink1.1-snapshot"));
initTestInstance.open();
latch.trigger();
synchronized (initTestInstance.getCheckpointLock()) {
initTestInstance.close();
}
FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1);
FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2);
FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3);
FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4);
// compare if the results contain what they should contain and also if
// they are the same, as they should.
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3)));
Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4)));
}
use of org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit in project flink by apache.
the class ContinuousFileProcessingMigrationTest method testFunctionRestore.
//// Monitoring Function Tests //////
@Test
public void testFunctionRestore() throws Exception {
/*
org.apache.hadoop.fs.Path path = null;
long fileModTime = Long.MIN_VALUE;
for (int i = 0; i < 1; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = fillWithData(hdfsURI, "file", i, "This is test line.");
path = file.f0;
fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
}
TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
final ContinuousFileMonitoringFunction<String> monitoringFunction =
new ContinuousFileMonitoringFunction<>(format, format.getFilePath().toString(), new PathFilter(), FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
StreamSource<FileInputSplit, ContinuousFileMonitoringFunction<String>> src =
new StreamSource<>(monitoringFunction);
final OneInputStreamOperatorTestHarness<Void, FileInputSplit> testHarness =
new OneInputStreamOperatorTestHarness<>(src);
testHarness.open();
final Throwable[] error = new Throwable[1];
final OneShotLatch latch = new OneShotLatch();
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
monitoringFunction.run(new DummySourceContext() {
@Override
public void collect(FileInputSplit element) {
latch.trigger();
}
});
}
catch (Throwable t) {
t.printStackTrace();
error[0] = t;
}
}
};
runner.start();
if (!latch.isTriggered()) {
latch.await();
}
StreamTaskState snapshot = testHarness.snapshot(0, 0);
testHarness.snaphotToFile(snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime +"-flink1.1-snapshot");
monitoringFunction.cancel();
runner.join();
testHarness.close();
*/
Long expectedModTime = Long.parseLong("1482144479339");
TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction);
final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
testHarness.setup();
testHarness.initializeStateFromLegacyCheckpoint(getResourceFilename("monitoring-function-migration-test-1482144479339-flink1.1-snapshot"));
testHarness.open();
Assert.assertEquals((long) expectedModTime, monitoringFunction.getGlobalModificationTime());
}
Aggregations