Search in sources :

Example 16 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method ignorePrefixComments.

@Test
public void ignorePrefixComments() throws Exception {
    String fileContent = "#description of the data\n" + "#successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setAllowComments(true);
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("this is", result.getField(0));
    assertEquals(1, result.getField(1));
    assertEquals(2.0, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("a test", result.getField(0));
    assertEquals(3, result.getField(1));
    assertEquals(4.0, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 17 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testSqlTimeFields.

@Test
public void testSqlTimeFields() throws Exception {
    String fileContent = "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5\n" + "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5.3\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { SqlTimeTypeInfo.DATE, SqlTimeTypeInfo.TIME, SqlTimeTypeInfo.TIMESTAMP, SqlTimeTypeInfo.TIMESTAMP };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
    RowCsvInputFormat format = builder.build();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(4);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(Date.valueOf("1990-10-14"), result.getField(0));
    assertEquals(Time.valueOf("02:42:25"), result.getField(1));
    assertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
    assertEquals(Timestamp.valueOf("1990-01-04 02:02:05"), result.getField(3));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(Date.valueOf("1990-10-14"), result.getField(0));
    assertEquals(Time.valueOf("02:42:25"), result.getField(1));
    assertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
    assertEquals(Timestamp.valueOf("1990-01-04 02:02:05.3"), result.getField(3));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 18 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testRemovingTrailingCR.

private static void testRemovingTrailingCR(String lineBreakerInFile) throws IOException {
    String fileContent = FIRST_PART + lineBreakerInFile + SECOND_PART + lineBreakerInFile;
    // create input file
    File tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
    tempFile.deleteOnExit();
    tempFile.setWritable(true);
    OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
    wrt.write(fileContent);
    wrt.close();
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), new Path(tempFile.toURI().toString()));
    RowCsvInputFormat inputFormat = builder.build();
    inputFormat.configure(new Configuration());
    FileInputSplit[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    Row result = inputFormat.nextRecord(new Row(1));
    assertNotNull("Expecting to not return null", result);
    assertEquals(FIRST_PART, result.getField(0));
    result = inputFormat.nextRecord(result);
    assertNotNull("Expecting to not return null", result);
    assertEquals(SECOND_PART, result.getField(0));
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Row(org.apache.flink.types.Row) File(java.io.File)

Example 19 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class RowCsvInputFormatTest method testQuotedStringParsingWithEscapedQuotes.

@Test
public void testQuotedStringParsingWithEscapedQuotes() throws Exception {
    String fileContent = "\"\\\"Hello\\\" World\"|\"We are\\\" young\"";
    File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
    tempFile.deleteOnExit();
    tempFile.setWritable(true);
    OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
    writer.write(fileContent);
    writer.close();
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), new Path(tempFile.toURI().toString())).setFieldDelimiter('|').setQuoteCharacter('"').setEscapeCharacter('\\');
    RowCsvInputFormat inputFormat = builder.build();
    inputFormat.configure(new Configuration());
    FileInputSplit[] splits = inputFormat.createInputSplits(1);
    inputFormat.open(splits[0]);
    Row record = inputFormat.nextRecord(new Row(2));
    assertEquals("\"Hello\" World", record.getField(0));
    assertEquals("We are\" young", record.getField(1));
}
Also used : Path(org.apache.flink.core.fs.Path) Configuration(org.apache.flink.configuration.Configuration) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Row(org.apache.flink.types.Row) File(java.io.File) Test(org.junit.Test)

Example 20 with RowTypeInfo

use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.

the class SimpleCatalogFactory method createCatalog.

@Override
public Catalog createCatalog(Context context) {
    final Configuration configuration = Configuration.fromMap(context.getOptions());
    final String database = configuration.getString(DEFAULT_DATABASE);
    final String tableName = configuration.getString(TABLE_NAME);
    final GenericInMemoryCatalog genericInMemoryCatalog = new GenericInMemoryCatalog(context.getName(), database);
    StreamTableSource<Row> tableSource = new StreamTableSource<Row>() {

        @Override
        public DataStream<Row> getDataStream(StreamExecutionEnvironment execEnv) {
            return execEnv.fromCollection(TABLE_CONTENTS).returns(new RowTypeInfo(new TypeInformation[] { Types.INT(), Types.STRING() }, new String[] { "id", "string" }));
        }

        @Override
        public TableSchema getTableSchema() {
            return TableSchema.builder().field("id", DataTypes.INT()).field("string", DataTypes.STRING()).build();
        }

        @Override
        public DataType getProducedDataType() {
            return DataTypes.ROW(DataTypes.FIELD("id", DataTypes.INT()), DataTypes.FIELD("string", DataTypes.STRING())).notNull();
        }
    };
    try {
        genericInMemoryCatalog.createTable(new ObjectPath(database, tableName), ConnectorCatalogTable.source(tableSource, false), false);
    } catch (Exception e) {
        throw new WrappingRuntimeException(e);
    }
    return genericInMemoryCatalog;
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) WrappingRuntimeException(org.apache.flink.util.WrappingRuntimeException) Configuration(org.apache.flink.configuration.Configuration) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Row(org.apache.flink.types.Row) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) StreamTableSource(org.apache.flink.table.sources.StreamTableSource) GenericInMemoryCatalog(org.apache.flink.table.catalog.GenericInMemoryCatalog) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) WrappingRuntimeException(org.apache.flink.util.WrappingRuntimeException)

Aggregations

RowTypeInfo (org.apache.flink.api.java.typeutils.RowTypeInfo)50 Test (org.junit.Test)34 Row (org.apache.flink.types.Row)32 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)26 Configuration (org.apache.flink.configuration.Configuration)16 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)15 ArrayList (java.util.ArrayList)10 Transformation (org.apache.flink.api.dag.Transformation)8 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)8 SourceTransformation (org.apache.flink.streaming.api.transformations.SourceTransformation)8 TwoInputTransformation (org.apache.flink.streaming.api.transformations.TwoInputTransformation)8 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6 PythonKeyedProcessOperator (org.apache.flink.streaming.api.operators.python.PythonKeyedProcessOperator)6 IOException (java.io.IOException)4 MapTypeInfo (org.apache.flink.api.java.typeutils.MapTypeInfo)4 File (java.io.File)3 FileOutputStream (java.io.FileOutputStream)3 OutputStreamWriter (java.io.OutputStreamWriter)3 LocalDateTime (java.time.LocalDateTime)3 PrimitiveArrayTypeInfo (org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo)3