Search in sources :

Example 41 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testSqlTimeFields.

@Test
public void testSqlTimeFields() throws Exception {
    String fileContent = "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5\n" + "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5.3\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { SqlTimeTypeInfo.DATE, SqlTimeTypeInfo.TIME, SqlTimeTypeInfo.TIMESTAMP, SqlTimeTypeInfo.TIMESTAMP };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(4);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(Date.valueOf("1990-10-14"), result.getField(0));
    assertEquals(Time.valueOf("02:42:25"), result.getField(1));
    assertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
    assertEquals(Timestamp.valueOf("1990-01-04 02:02:05"), result.getField(3));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(Date.valueOf("1990-10-14"), result.getField(0));
    assertEquals(Time.valueOf("02:42:25"), result.getField(1));
    assertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
    assertEquals(Timestamp.valueOf("1990-01-04 02:02:05.3"), result.getField(3));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 42 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testReadFirstN.

@Test
public void testReadFirstN() throws Exception {
    String fileContent = "111|222|333|444|555|\n666|777|888|999|000|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(2);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(222, result.getField(1));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(666, result.getField(0));
    assertEquals(777, result.getField(1));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 43 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method readMixedQuotedStringFields.

@Test
public void readMixedQuotedStringFields() throws Exception {
    String fileContent = "@a|b|c@|def|@ghijk@\nabc||@|hhg@\n|||\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.configure(new Configuration());
    format.enableQuotedStringParsing('@');
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("a|b|c", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("ghijk", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("|hhg", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 44 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method ignoreMultiCharPrefixComments.

@Test
public void ignoreMultiCharPrefixComments() throws Exception {
    String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.setCommentPrefix("//");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("this is", result.getField(0));
    assertEquals(1, result.getField(1));
    assertEquals(2.0, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("a test", result.getField(0));
    assertEquals(3, result.getField(1));
    assertEquals(4.0, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 45 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testDoubleFields.

@Test
public void testDoubleFields() throws Exception {
    String fileContent = "11.1|22.2|33.3|44.4|55.5\n66.6|77.7|88.8|99.9|00.0|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(5);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(11.1, result.getField(0));
    assertEquals(22.2, result.getField(1));
    assertEquals(33.3, result.getField(2));
    assertEquals(44.4, result.getField(3));
    assertEquals(55.5, result.getField(4));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(66.6, result.getField(0));
    assertEquals(77.7, result.getField(1));
    assertEquals(88.8, result.getField(2));
    assertEquals(99.9, result.getField(3));
    assertEquals(0.0, result.getField(4));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Aggregations

TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)51 Test (org.junit.Test)28 Row (org.apache.flink.types.Row)21 Configuration (org.apache.flink.configuration.Configuration)20 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)20 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)10 ArrayList (java.util.ArrayList)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)8 IOException (java.io.IOException)7 Type (java.lang.reflect.Type)7 GenericArrayType (java.lang.reflect.GenericArrayType)6 ParameterizedType (java.lang.reflect.ParameterizedType)6 Random (java.util.Random)6 InvalidTypesException (org.apache.flink.api.common.functions.InvalidTypesException)6 TypeExtractionUtils.isClassType (org.apache.flink.api.java.typeutils.TypeExtractionUtils.isClassType)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 TypeVariable (java.lang.reflect.TypeVariable)4 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)4