Search in sources :

Example 1 with FilePathFilter

use of org.apache.flink.api.common.io.FilePathFilter in project flink by apache.

the class ContinuousFileProcessingTest method testFilePathFiltering.

// //				Monitoring Function Tests				//////
@Test
public void testFilePathFiltering() throws Exception {
    String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";
    Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    Set<String> filesKept = new TreeSet<>();
    // create the files to be discarded
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "**file", i, "This is test line.");
        filesCreated.add(file.f0);
    }
    // create the files to be kept
    for (int i = 0; i < NO_OF_FILES; i++) {
        Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
        filesCreated.add(file.f0);
        filesKept.add(file.f0.getName());
    }
    TextInputFormat format = new TextInputFormat(new Path(testBasePath));
    format.setFilesFilter(new FilePathFilter() {

        private static final long serialVersionUID = 2611449927338589804L;

        @Override
        public boolean filterPath(Path filePath) {
            return filePath.getName().startsWith("**");
        }
    });
    ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);
    final FileVerifyingSourceContext context = new FileVerifyingSourceContext(new OneShotLatch(), monitoringFunction);
    monitoringFunction.open(new Configuration());
    monitoringFunction.run(context);
    Assert.assertArrayEquals(filesKept.toArray(), context.getSeenFiles().toArray());
    // finally delete the files created for the test.
    for (org.apache.hadoop.fs.Path file : filesCreated) {
        hdfs.delete(file, false);
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) TextInputFormat(org.apache.flink.api.java.io.TextInputFormat) TreeSet(java.util.TreeSet) FilePathFilter(org.apache.flink.api.common.io.FilePathFilter) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

HashSet (java.util.HashSet)1 TreeSet (java.util.TreeSet)1 FilePathFilter (org.apache.flink.api.common.io.FilePathFilter)1 TextInputFormat (org.apache.flink.api.java.io.TextInputFormat)1 Configuration (org.apache.flink.configuration.Configuration)1 Path (org.apache.flink.core.fs.Path)1 OneShotLatch (org.apache.flink.core.testutils.OneShotLatch)1 Test (org.junit.Test)1