Search in sources :

Example 26 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testTailingEmptyFields.

@Test
public void testTailingEmptyFields() throws Exception {
    String fileContent = "abc|def|ghijk\n" + "abc|def|\n" + "abc||\n" + "|||\n" + "||\n" + "abc|def\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("ghijk", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    try {
        format.nextRecord(result);
        fail("RuntimeException: Row length mismatch. 3 fields expected but was 2");
    } catch (IOException e) {
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) IOException(java.io.IOException) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 27 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testQuotedStringParsingWithIncludeFields.

@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
    String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <blah@blahblah.org>\"|\"blaaa\"|\"blubb\"";
    File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
    tempFile.deleteOnExit();
    tempFile.setWritable(true);
    OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
    writer.write(fileContent);
    writer.close();
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), new Path(tempFile.toURI().toString())).setFieldDelimiter('|').setSelectedFields(new int[] { 0, 2 }).setQuoteCharacter('"');
    RowCsvInputFormat inputFormat = builder.build();
    inputFormat.configure(new Configuration());
    FileInputSplit[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    Row record = inputFormat.nextRecord(new Row(2));
    assertEquals("20:41:52-1-3-2015", record.getField(0));
    assertEquals("Blahblah <blah@blahblah.org>", record.getField(1));
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Row(org.apache.flink.types.Row) File(java.io.File) Test(org.junit.Test)

Example 28 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testIntegerFields.

@Test
public void testIntegerFields() throws Exception {
    String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(5);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(222, result.getField(1));
    assertEquals(333, result.getField(2));
    assertEquals(444, result.getField(3));
    assertEquals(555, result.getField(4));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(666, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(888, result.getField(2));
    assertEquals(999, result.getField(3));
    assertEquals(0, result.getField(4));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 29 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testReadSparseWithPositionSetter.

@Test
public void testReadSparseWithPositionSetter() throws Exception {
    String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setSelectedFields(new int[] { 0, 3, 7 });
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(444, result.getField(1));
    assertEquals(888, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(0, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(333, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 30 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testEmptyFields.

@Test
public void testEmptyFields() throws Exception {
    String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter(',').setNullLiteral("");
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(8);
    int linesCnt = fileContent.split("\n").length;
    for (int i = 0; i < linesCnt; i++) {
        result = format.nextRecord(result);
        assertNull(result.getField(i));
    }
    // ensure no more rows
    assertNull(format.nextRecord(result));
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Aggregations

RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)50 Test (org.junit.Test)34 Row (org.apache.flink.types.Row)32 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)26 Configuration (org.apache.flink.configuration.Configuration)16 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)15 ArrayList (java.util.ArrayList)10 Transformation (org.apache.flink.api.dag.Transformation)8 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)8 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)8 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 PythonKeyedProcessOperator (org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator)6 IOException (java.io.IOException)4 MapTypeInfo (org.apache.flink.api.java.typeutils.MapTypeInfo)4 File (java.io.File)3 FileOutputStream (java.io.FileOutputStream)3 OutputStreamWriter (java.io.OutputStreamWriter)3 LocalDateTime (java.time.LocalDateTime)3 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)3