Search in sources :

Example 66 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class CsvInputFormatTest method testRemovingTrailingCR.

private void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) {
    File tempFile = null;
    String fileContent = CsvInputFormatTest.FIRST_PART + lineBreakerInFile + CsvInputFormatTest.SECOND_PART + lineBreakerInFile;
    try {
        // create input file
        tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
        tempFile.deleteOnExit();
        tempFile.setWritable(true);
        OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
        wrt.write(fileContent);
        wrt.close();
        final TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
        final CsvInputFormat<Tuple1<String>> inputFormat = new TupleCsvInputFormat<Tuple1<String>>(new Path(tempFile.toURI().toString()), typeInfo);
        Configuration parameters = new Configuration();
        inputFormat.configure(parameters);
        inputFormat.setDelimiter(lineBreakerSetup);
        FileInputSplit[] splits = inputFormat.createInputSplits(1);
        inputFormat.open(splits[0]);
        Tuple1<String> result = inputFormat.nextRecord(new Tuple1<String>());
        assertNotNull("Expecting to not return null", result);
        assertEquals(FIRST_PART, result.f0);
        result = inputFormat.nextRecord(result);
        assertNotNull("Expecting to not return null", result);
        assertEquals(SECOND_PART, result.f0);
    } catch (Throwable t) {
        System.err.println("test failed with exception: " + t.getMessage());
        t.printStackTrace(System.err);
        fail("Test erroneous");
    }
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File)

Example 67 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class CsvInputFormatTest method testDoubleFields.

@Test
public void testDoubleFields() throws IOException {
    try {
        final String fileContent = "11.1|22.2|33.3|44.4|55.5\n66.6|77.7|88.8|99.9|00.0|\n";
        final FileInputSplit split = createTempFile(fileContent);
        final TupleTypeInfo<Tuple5<Double, Double, Double, Double, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Double.class, Double.class, Double.class, Double.class, Double.class);
        final CsvInputFormat<Tuple5<Double, Double, Double, Double, Double>> format = new TupleCsvInputFormat<Tuple5<Double, Double, Double, Double, Double>>(PATH, typeInfo);
        format.setFieldDelimiter("|");
        format.configure(new Configuration());
        format.open(split);
        Tuple5<Double, Double, Double, Double, Double> result = new Tuple5<Double, Double, Double, Double, Double>();
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Double.valueOf(11.1), result.f0);
        assertEquals(Double.valueOf(22.2), result.f1);
        assertEquals(Double.valueOf(33.3), result.f2);
        assertEquals(Double.valueOf(44.4), result.f3);
        assertEquals(Double.valueOf(55.5), result.f4);
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Double.valueOf(66.6), result.f0);
        assertEquals(Double.valueOf(77.7), result.f1);
        assertEquals(Double.valueOf(88.8), result.f2);
        assertEquals(Double.valueOf(99.9), result.f3);
        assertEquals(Double.valueOf(00.0), result.f4);
        result = format.nextRecord(result);
        assertNull(result);
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) ParseException(org.apache.flink.api.common.io.ParseException) Test(org.junit.Test)

Example 68 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class CsvInputFormatTest method testReadFirstN.

@Test
public void testReadFirstN() throws IOException {
    try {
        final String fileContent = "111|222|333|444|555|\n666|777|888|999|000|\n";
        final FileInputSplit split = createTempFile(fileContent);
        final TupleTypeInfo<Tuple2<Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class);
        final CsvInputFormat<Tuple2<Integer, Integer>> format = new TupleCsvInputFormat<Tuple2<Integer, Integer>>(PATH, typeInfo);
        format.setFieldDelimiter("|");
        format.configure(new Configuration());
        format.open(split);
        Tuple2<Integer, Integer> result = new Tuple2<Integer, Integer>();
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Integer.valueOf(111), result.f0);
        assertEquals(Integer.valueOf(222), result.f1);
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Integer.valueOf(666), result.f0);
        assertEquals(Integer.valueOf(777), result.f1);
        result = format.nextRecord(result);
        assertNull(result);
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) ParseException(org.apache.flink.api.common.io.ParseException) Test(org.junit.Test)

Example 69 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class CsvInputFormatTest method testReadSparseWithPositionSetter.

@Test
public void testReadSparseWithPositionSetter() throws IOException {
    try {
        final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
        final FileInputSplit split = createTempFile(fileContent);
        final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.class, Integer.class, Integer.class);
        final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new int[] { 0, 3, 7 });
        format.setFieldDelimiter("|");
        format.configure(new Configuration());
        format.open(split);
        Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Integer.valueOf(111), result.f0);
        assertEquals(Integer.valueOf(444), result.f1);
        assertEquals(Integer.valueOf(888), result.f2);
        result = format.nextRecord(result);
        assertNotNull(result);
        assertEquals(Integer.valueOf(000), result.f0);
        assertEquals(Integer.valueOf(777), result.f1);
        assertEquals(Integer.valueOf(333), result.f2);
        result = format.nextRecord(result);
        assertNull(result);
        assertTrue(format.reachedEnd());
    } catch (Exception ex) {
        fail("Test failed due to a " + ex.getClass().getName() + ": " + ex.getMessage());
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) ParseException(org.apache.flink.api.common.io.ParseException) Test(org.junit.Test)

Example 70 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class DelimitedInputFormatTest method createTempFile.

static FileInputSplit createTempFile(String contents, String charset) throws IOException {
    File tempFile = File.createTempFile("test_contents", "tmp");
    tempFile.deleteOnExit();
    try (Writer out = new OutputStreamWriter(new FileOutputStream(tempFile), charset)) {
        out.write(contents);
    }
    return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] { "localhost" });
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer)

Aggregations

FileInputSplit (org.apache.flink.core.fs.FileInputSplit)140 Test (org.junit.Test)119 Configuration (org.apache.flink.configuration.Configuration)93 Path (org.apache.flink.core.fs.Path)59 IOException (java.io.IOException)45 File (java.io.File)36 FileOutputStream (java.io.FileOutputStream)23 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)20 Row (org.apache.flink.types.Row)20 OutputStreamWriter (java.io.OutputStreamWriter)18 ParseException (org.apache.flink.api.common.io.ParseException)17 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)17 DoubleValue (org.apache.flink.types.DoubleValue)17 IntValue (org.apache.flink.types.IntValue)17 LongValue (org.apache.flink.types.LongValue)17 StringValue (org.apache.flink.types.StringValue)17 Value (org.apache.flink.types.Value)17 Plan (org.apache.flink.api.common.Plan)12 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)12 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)12