Search in sources :

Example 6 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testTailingEmptyFields.

@Test
public void testTailingEmptyFields() throws Exception {
    String fileContent = "abc|-def|-ghijk\n" + "abc|-def|-\n" + "abc|-|-\n" + "|-|-|-\n" + "|-|-\n" + "abc|-def\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.setFieldDelimiter("|-");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("ghijk", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    try {
        format.nextRecord(result);
        fail("Parse Exception was not thrown! (Row too short)");
    } catch (ParseException e) {
    }
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) ParseException(org.apache.flink.api.common.io.ParseException) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 7 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testIntegerFields.

@Test
public void testIntegerFields() throws Exception {
    String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(5);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(222, result.getField(1));
    assertEquals(333, result.getField(2));
    assertEquals(444, result.getField(3));
    assertEquals(555, result.getField(4));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(666, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(888, result.getField(2));
    assertEquals(999, result.getField(3));
    assertEquals(0, result.getField(4));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 8 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method readStringFieldsWithTrailingDelimiters.

@Test
public void readStringFieldsWithTrailingDelimiters() throws Exception {
    String fileContent = "abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.setFieldDelimiter("|-");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("ghijk", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("hhg", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 9 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testReadSparseWithPositionSetter.

@Test
public void testReadSparseWithPositionSetter() throws Exception {
    String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(444, result.getField(1));
    assertEquals(888, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(0, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(333, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 10 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testEmptyFields.

@Test
public void testEmptyFields() throws Exception {
    String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, true);
    format.setFieldDelimiter(",");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(8);
    int linesCnt = fileContent.split("\n").length;
    for (int i = 0; i < linesCnt; i++) {
        result = format.nextRecord(result);
        assertNull(result.getField(i));
    }
    // ensure no more rows
    assertNull(format.nextRecord(result));
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Aggregations

TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)51 Test (org.junit.Test)28 Row (org.apache.flink.types.Row)21 Configuration (org.apache.flink.configuration.Configuration)20 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)20 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)10 ArrayList (java.util.ArrayList)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)8 IOException (java.io.IOException)7 Type (java.lang.reflect.Type)7 GenericArrayType (java.lang.reflect.GenericArrayType)6 ParameterizedType (java.lang.reflect.ParameterizedType)6 Random (java.util.Random)6 InvalidTypesException (org.apache.flink.api.common.functions.InvalidTypesException)6 TypeExtractionUtils.isClassType (org.apache.flink.api.java.typeutils.TypeExtractionUtils.isClassType)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 TypeVariable (java.lang.reflect.TypeVariable)4 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)4