Search in sources :

Example 91 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class EnumerateNestedFilesTest method testTwoNestedDirectoriesTrue.

/**
	 * Test with two nested directories and recursive.file.enumeration = true
	 */
@Test
public void testTwoNestedDirectoriesTrue() {
    try {
        String firstLevelDir = TestFileUtils.randomFileName();
        String secondLevelDir = TestFileUtils.randomFileName();
        String thirdLevelDir = TestFileUtils.randomFileName();
        File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir);
        nestedDir.mkdirs();
        nestedDir.deleteOnExit();
        File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir);
        insideNestedDir.mkdirs();
        insideNestedDir.deleteOnExit();
        File nestedNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir + System.getProperty("file.separator") + thirdLevelDir);
        nestedNestedDir.mkdirs();
        nestedNestedDir.deleteOnExit();
        // create a file in the first-level, two files in the second level and one in the third level
        TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua");
        TestFileUtils.createTempFileInDirectory(nestedNestedDir.getAbsolutePath(), "bravas");
        this.format.setFilePath(new Path(nestedDir.toURI().toString()));
        this.config.setBoolean("recursive.file.enumeration", true);
        format.configure(this.config);
        FileInputSplit[] splits = format.createInputSplits(1);
        Assert.assertEquals(4, splits.length);
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 92 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class EnumerateNestedFilesTest method testNoNestedDirectoryTrue.

/**
	 * Test without nested directory and recursive.file.enumeration = true
	 */
@Test
public void testNoNestedDirectoryTrue() {
    try {
        String filePath = TestFileUtils.createTempFile("foo");
        this.format.setFilePath(new Path(filePath));
        this.config.setBoolean("recursive.file.enumeration", true);
        format.configure(this.config);
        FileInputSplit[] splits = format.createInputSplits(1);
        Assert.assertEquals(1, splits.length);
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) IOException(java.io.IOException) Test(org.junit.Test)

Example 93 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class EnumerateNestedFilesTest method testOneNestedDirectoryTrue.

/**
	 * Test with one nested directory and recursive.file.enumeration = true
	 */
@Test
public void testOneNestedDirectoryTrue() {
    try {
        String firstLevelDir = TestFileUtils.randomFileName();
        String secondLevelDir = TestFileUtils.randomFileName();
        File nestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir);
        nestedDir.mkdirs();
        nestedDir.deleteOnExit();
        File insideNestedDir = new File(tempPath + System.getProperty("file.separator") + firstLevelDir + System.getProperty("file.separator") + secondLevelDir);
        insideNestedDir.mkdirs();
        insideNestedDir.deleteOnExit();
        // create a file in the first-level and two files in the nested dir
        TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua");
        this.format.setFilePath(new Path(nestedDir.toURI().toString()));
        this.config.setBoolean("recursive.file.enumeration", true);
        format.configure(this.config);
        FileInputSplit[] splits = format.createInputSplits(1);
        Assert.assertEquals(3, splits.length);
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 94 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class EnumerateNestedFilesTest method testTwoNestedDirectoriesWithFilteredFilesTrue.

/**
	 * Test with two nested directories and recursive.file.enumeration = true
	 */
@Test
public void testTwoNestedDirectoriesWithFilteredFilesTrue() {
    String sep = System.getProperty("file.separator");
    try {
        String firstLevelDir = TestFileUtils.randomFileName();
        String secondLevelDir = TestFileUtils.randomFileName();
        String thirdLevelDir = TestFileUtils.randomFileName();
        String secondLevelFilterDir = "_" + TestFileUtils.randomFileName();
        String thirdLevelFilterDir = "_" + TestFileUtils.randomFileName();
        File nestedDir = new File(tempPath + sep + firstLevelDir);
        nestedDir.mkdirs();
        nestedDir.deleteOnExit();
        File insideNestedDir = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir);
        insideNestedDir.mkdirs();
        insideNestedDir.deleteOnExit();
        File insideNestedDirFiltered = new File(tempPath + sep + firstLevelDir + sep + secondLevelFilterDir);
        insideNestedDirFiltered.mkdirs();
        insideNestedDirFiltered.deleteOnExit();
        File filteredFile = new File(tempPath + sep + firstLevelDir + sep + "_IWillBeFiltered");
        filteredFile.createNewFile();
        filteredFile.deleteOnExit();
        File nestedNestedDir = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir + sep + thirdLevelDir);
        nestedNestedDir.mkdirs();
        nestedNestedDir.deleteOnExit();
        File nestedNestedDirFiltered = new File(tempPath + sep + firstLevelDir + sep + secondLevelDir + sep + thirdLevelFilterDir);
        nestedNestedDirFiltered.mkdirs();
        nestedNestedDirFiltered.deleteOnExit();
        // create a file in the first-level, two files in the second level and one in the third level
        TestFileUtils.createTempFileInDirectory(nestedDir.getAbsolutePath(), "paella");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "kalamari");
        TestFileUtils.createTempFileInDirectory(insideNestedDir.getAbsolutePath(), "fideua");
        TestFileUtils.createTempFileInDirectory(nestedNestedDir.getAbsolutePath(), "bravas");
        // create files which are filtered
        TestFileUtils.createTempFileInDirectory(insideNestedDirFiltered.getAbsolutePath(), "kalamari");
        TestFileUtils.createTempFileInDirectory(insideNestedDirFiltered.getAbsolutePath(), "fideua");
        TestFileUtils.createTempFileInDirectory(nestedNestedDirFiltered.getAbsolutePath(), "bravas");
        this.format.setFilePath(new Path(nestedDir.toURI().toString()));
        this.config.setBoolean("recursive.file.enumeration", true);
        format.configure(this.config);
        FileInputSplit[] splits = format.createInputSplits(1);
        Assert.assertEquals(4, splits.length);
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 95 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class BinaryInputFormatTest method testCreateInputSplitsWithOneFile.

@Test
public void testCreateInputSplitsWithOneFile() throws IOException {
    // create temporary file with 3 blocks
    final File tempFile = File.createTempFile("binary_input_format_test", "tmp");
    tempFile.deleteOnExit();
    final int blockInfoSize = new BlockInfo().getInfoSize();
    final int blockSize = blockInfoSize + 8;
    final int numBlocks = 3;
    FileOutputStream fileOutputStream = new FileOutputStream(tempFile);
    for (int i = 0; i < blockSize * numBlocks; i++) {
        fileOutputStream.write(new byte[] { 1 });
    }
    fileOutputStream.close();
    final Configuration config = new Configuration();
    config.setLong("input.block_size", blockSize + 10);
    final BinaryInputFormat<Record> inputFormat = new MyBinaryInputFormat();
    inputFormat.setFilePath(tempFile.toURI().toString());
    inputFormat.setBlockSize(blockSize);
    inputFormat.configure(config);
    FileInputSplit[] inputSplits = inputFormat.createInputSplits(numBlocks);
    Assert.assertEquals("Returns requested numbers of splits.", numBlocks, inputSplits.length);
    Assert.assertEquals("1. split has block size length.", blockSize, inputSplits[0].getLength());
    Assert.assertEquals("2. split has block size length.", blockSize, inputSplits[1].getLength());
    Assert.assertEquals("3. split has block size length.", blockSize, inputSplits[2].getLength());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) FileOutputStream(java.io.FileOutputStream) Record(org.apache.flink.types.Record) File(java.io.File) Test(org.junit.Test)

Aggregations

FileInputSplit (org.apache.flink.core.fs.FileInputSplit)140 Test (org.junit.Test)119 Configuration (org.apache.flink.configuration.Configuration)93 Path (org.apache.flink.core.fs.Path)59 IOException (java.io.IOException)45 File (java.io.File)36 FileOutputStream (java.io.FileOutputStream)23 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)20 Row (org.apache.flink.types.Row)20 OutputStreamWriter (java.io.OutputStreamWriter)18 ParseException (org.apache.flink.api.common.io.ParseException)17 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)17 DoubleValue (org.apache.flink.types.DoubleValue)17 IntValue (org.apache.flink.types.IntValue)17 LongValue (org.apache.flink.types.LongValue)17 StringValue (org.apache.flink.types.StringValue)17 Value (org.apache.flink.types.Value)17 Plan (org.apache.flink.api.common.Plan)12 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)12 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)12