use of org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction in project flink by apache.
the class ContinuousFileProcessingTest method testFilePathFiltering.
//// Monitoring Function Tests //////
@Test
public void testFilePathFiltering() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
Set<String> filesKept = new TreeSet<>();
// create the files to be discarded
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "**file", i, "This is test line.");
filesCreated.add(file.f0);
}
// create the files to be kept
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
filesCreated.add(file.f0);
filesKept.add(file.f0.getName());
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(new FilePathFilter() {
private static final long serialVersionUID = 2611449927338589804L;
@Override
public boolean filterPath(Path filePath) {
return filePath.getName().startsWith("**");
}
});
ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
final FileVerifyingSourceContext context = new FileVerifyingSourceContext(new OneShotLatch(), monitoringFunction);
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
Assert.assertArrayEquals(filesKept.toArray(), context.getSeenFiles().toArray());
// finally delete the files created for the test.
for (org.apache.hadoop.fs.Path file : filesCreated) {
hdfs.delete(file, false);
}
}
use of org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction in project flink by apache.
the class ContinuousFileProcessingTest method testSortingOnModTime.
@Test
public void testSortingOnModTime() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
final long[] modTimes = new long[NO_OF_FILES];
final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
Thread.sleep(400);
filesCreated[i] = file.f0;
modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
}
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
// this is just to verify that all splits have been forwarded later.
FileInputSplit[] splits = format.createInputSplits(1);
ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
Assert.assertEquals(splits.length, context.getCounter());
// delete the created files.
for (int i = 0; i < NO_OF_FILES; i++) {
hdfs.delete(filesCreated[i], false);
}
}
use of org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction in project flink by apache.
the class ContinuousFileProcessingTest method testInvalidPathSpecification.
@Test
public void testInvalidPathSpecification() throws Exception {
String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
TextInputFormat format = new TextInputFormat(new Path(invalidPath));
ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
try {
monitoringFunction.run(new DummySourceContext() {
@Override
public void collect(TimestampedFileInputSplit element) {
// we should never arrive here with an invalid path
Assert.fail("Test passes with an invalid path.");
}
});
// we should never arrive here with an invalid path
Assert.fail("Test passed with an invalid path.");
} catch (FileNotFoundException e) {
Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
}
}
use of org.apache.flink.streaming.api.functions.source.ContinuousFileMonitoringFunction in project flink by apache.
the class ContinuousFileProcessingTest method testProcessContinuously.
@Test
public void testProcessContinuously() throws Exception {
String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
final OneShotLatch latch = new OneShotLatch();
// create a single file in the directory
Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
Assert.assertTrue(hdfs.exists(bootstrap.f0));
final Set<String> filesToBeRead = new TreeSet<>();
filesToBeRead.add(bootstrap.f0.getName());
TextInputFormat format = new TextInputFormat(new Path(testBasePath));
format.setFilesFilter(FilePathFilter.createDefaultFilter());
final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
// 1 for the bootstrap + NO_OF_FILES
final int totalNoOfFilesToBeRead = NO_OF_FILES + 1;
final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction, 1, totalNoOfFilesToBeRead);
final Thread t = new Thread() {
@Override
public void run() {
try {
monitoringFunction.open(new Configuration());
monitoringFunction.run(context);
} catch (Exception e) {
Assert.fail(e.getMessage());
}
}
};
t.start();
if (!latch.isTriggered()) {
latch.await();
}
// create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY
final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
for (int i = 0; i < NO_OF_FILES; i++) {
Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
filesCreated[i] = file.f0;
filesToBeRead.add(file.f0.getName());
}
// wait until the monitoring thread exits
t.join();
Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
// finally delete the files created for the test.
hdfs.delete(bootstrap.f0, false);
for (org.apache.hadoop.fs.Path path : filesCreated) {
hdfs.delete(path, false);
}
}
Aggregations