Search in sources :

Example 21 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class RowCsvInputFormatTest method testReadSparseWithPositionSetter.

@Test
public void testReadSparseWithPositionSetter() throws Exception {
    String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(444, result.getField(1));
    assertEquals(888, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(0, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(333, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 22 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class RowCsvInputFormatTest method testEmptyFields.

@Test
public void testEmptyFields() throws Exception {
    String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, true);
    format.setFieldDelimiter(",");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(8);
    int linesCnt = fileContent.split("\n").length;
    for (int i = 0; i < linesCnt; i++) {
        result = format.nextRecord(result);
        assertNull(result.getField(i));
    }
    // ensure no more rows
    assertNull(format.nextRecord(result));
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 23 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class RowCsvInputFormatTest method testRemovingTrailingCR.

private static void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) throws IOException {
    String fileContent = FIRST_PART + lineBreakerInFile + SECOND_PART + lineBreakerInFile;
    // create input file
    File tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
    tempFile.deleteOnExit();
    tempFile.setWritable(true);
    OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
    wrt.write(fileContent);
    wrt.close();
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes);
    inputFormat.configure(new Configuration());
    inputFormat.setDelimiter(lineBreakerSetup);
    FileInputSplit[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    Row result = inputFormat.nextRecord(new Row(1));
    assertNotNull("Expecting to not return null", result);
    assertEquals(FIRST_PART, result.getField(0));
    result = inputFormat.nextRecord(result);
    assertNotNull("Expecting to not return null", result);
    assertEquals(SECOND_PART, result.getField(0));
}
Also used : Path(org.apache.flink.core.fs.Path) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Row(org.apache.flink.types.Row) File(java.io.File) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation)

Example 24 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class TextInputFormatTest method testSimpleRead.

@Test
public void testSimpleRead() {
    final String FIRST = "First line";
    final String SECOND = "Second line";
    try {
        // create input file
        File tempFile = File.createTempFile("TextInputFormatTest", "tmp");
        tempFile.deleteOnExit();
        tempFile.setWritable(true);
        PrintStream ps = new PrintStream(tempFile);
        ps.println(FIRST);
        ps.println(SECOND);
        ps.close();
        TextInputFormat inputFormat = new TextInputFormat(new Path(tempFile.toURI().toString()));
        Configuration parameters = new Configuration();
        inputFormat.configure(parameters);
        FileInputSplit[] splits = inputFormat.createInputSplits(1);
        assertTrue("expected at least one input split", splits.length >= 1);
        inputFormat.open(splits[0]);
        String result = "";
        assertFalse(inputFormat.reachedEnd());
        result = inputFormat.nextRecord("");
        assertNotNull("Expecting first record here", result);
        assertEquals(FIRST, result);
        assertFalse(inputFormat.reachedEnd());
        result = inputFormat.nextRecord(result);
        assertNotNull("Expecting second record here", result);
        assertEquals(SECOND, result);
        assertTrue(inputFormat.reachedEnd() || null == inputFormat.nextRecord(result));
    } catch (Throwable t) {
        System.err.println("test failed with exception: " + t.getMessage());
        t.printStackTrace(System.err);
        fail("Test erroneous");
    }
}
Also used : Path(org.apache.flink.core.fs.Path) PrintStream(java.io.PrintStream) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) File(java.io.File) Test(org.junit.Test)

Example 25 with FileInputSplit

use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.

the class CsvInputFormatTest method testPojoTypeWithMappingInfoAndPartialField.

@Test
public void testPojoTypeWithMappingInfoAndPartialField() throws Exception {
    File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
    tempFile.deleteOnExit();
    tempFile.setWritable(true);
    OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
    wrt.write("123,3.123,AAA,BBB\n");
    wrt.write("456,1.123,BBB,AAA\n");
    wrt.close();
    @SuppressWarnings("unchecked") PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
    CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field4" }, new boolean[] { true, false, false, true });
    inputFormat.configure(new Configuration());
    FileInputSplit[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    PojoItem item = new PojoItem();
    inputFormat.nextRecord(item);
    assertEquals(123, item.field1);
    assertEquals("BBB", item.field4);
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) PojoTypeInfo(org.apache.flink.api.java.typeutils.PojoTypeInfo) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) Test(org.junit.Test)

Aggregations

FileInputSplit (org.apache.flink.core.fs.FileInputSplit)140 Test (org.junit.Test)119 Configuration (org.apache.flink.configuration.Configuration)93 Path (org.apache.flink.core.fs.Path)59 IOException (java.io.IOException)45 File (java.io.File)36 FileOutputStream (java.io.FileOutputStream)23 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)20 Row (org.apache.flink.types.Row)20 OutputStreamWriter (java.io.OutputStreamWriter)18 ParseException (org.apache.flink.api.common.io.ParseException)17 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)17 DoubleValue (org.apache.flink.types.DoubleValue)17 IntValue (org.apache.flink.types.IntValue)17 LongValue (org.apache.flink.types.LongValue)17 StringValue (org.apache.flink.types.StringValue)17 Value (org.apache.flink.types.Value)17 Plan (org.apache.flink.api.common.Plan)12 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)12 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)12