Search in sources :

Example 1 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class StreamExecutionEnvironment method addSource.

/**
	 * Ads a data source with a custom type information thus opening a
	 * {@link DataStream}. Only in very special cases does the user need to
	 * support type information. Otherwise use
	 * {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
	 *
	 * @param function
	 * 		the user defined function
	 * @param sourceName
	 * 		Name of the data source
	 * @param <OUT>
	 * 		type of the returned stream
	 * @param typeInfo
	 * 		the user defined type information for the stream
	 * @return the data stream constructed
	 */
@SuppressWarnings("unchecked")
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {
    if (typeInfo == null) {
        if (function instanceof ResultTypeQueryable) {
            typeInfo = ((ResultTypeQueryable<OUT>) function).getProducedType();
        } else {
            try {
                typeInfo = TypeExtractor.createTypeInfo(SourceFunction.class, function.getClass(), 0, null, null);
            } catch (final InvalidTypesException e) {
                typeInfo = (TypeInformation<OUT>) new MissingTypeInfo(sourceName, e);
            }
        }
    }
    boolean isParallel = function instanceof ParallelSourceFunction;
    clean(function);
    StreamSource<OUT, ?> sourceOperator;
    if (function instanceof StoppableFunction) {
        sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
    } else {
        sourceOperator = new StreamSource<>(function);
    }
    return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
}
Also used : ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) InputFormatSourceFunction(org.apache.flink.streaming.api.functions.source.InputFormatSourceFunction) MissingTypeInfo(org.apache.flink.api.java.typeutils.MissingTypeInfo) ResultTypeQueryable(org.apache.flink.api.java.typeutils.ResultTypeQueryable) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) StoppableFunction(org.apache.flink.api.common.functions.StoppableFunction) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) InvalidTypesException(org.apache.flink.api.common.functions.InvalidTypesException)

Example 2 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testReadSparseWithNullFieldsForTypes.

@Test
public void testReadSparseWithNullFieldsForTypes() throws Exception {
    String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
    format.setFieldDelimiter("|x|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(111, result.getField(0));
    assertEquals(444, result.getField(1));
    assertEquals(888, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(0, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(333, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 3 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method readStringFields.

@Test
public void readStringFields() throws Exception {
    String fileContent = "abc|def|ghijk\nabc||hhg\n|||\n||";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("def", result.getField(1));
    assertEquals("ghijk", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("abc", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("hhg", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("", result.getField(0));
    assertEquals("", result.getField(1));
    assertEquals("", result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    assertTrue(format.reachedEnd());
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 4 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method ignoreInvalidLines.

@Test
public void ignoreInvalidLines() throws Exception {
    String fileContent = "#description of the data\n" + "header1|header2|header3|\n" + "this is|1|2.0|\n" + "//a comment\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
    format.setLenient(false);
    Configuration parameters = new Configuration();
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    try {
        result = format.nextRecord(result);
        fail("Parse Exception was not thrown! (Row too short)");
    } catch (ParseException ignored) {
    }
    try {
        result = format.nextRecord(result);
        fail("Parse Exception was not thrown! (Invalid int value)");
    } catch (ParseException ignored) {
    }
    // => ok
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("this is", result.getField(0));
    assertEquals(1, result.getField(1));
    assertEquals(2.0, result.getField(2));
    try {
        result = format.nextRecord(result);
        fail("Parse Exception was not thrown! (Row too short)");
    } catch (ParseException ignored) {
    }
    // => ok
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("a test", result.getField(0));
    assertEquals(3, result.getField(1));
    assertEquals(4.0, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("#next", result.getField(0));
    assertEquals(5, result.getField(1));
    assertEquals(6.0, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
    // re-open with lenient = true
    format.setLenient(true);
    format.configure(parameters);
    format.open(split);
    result = new Row(3);
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("header1", result.getField(0));
    assertNull(result.getField(1));
    assertNull(result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("this is", result.getField(0));
    assertEquals(1, result.getField(1));
    assertEquals(2.0, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("a test", result.getField(0));
    assertEquals(3, result.getField(1));
    assertEquals(4.0, result.getField(2));
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals("#next", result.getField(0));
    assertEquals(5, result.getField(1));
    assertEquals(6.0, result.getField(2));
    result = format.nextRecord(result);
    assertNull(result);
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) ParseException(org.apache.flink.api.common.io.ParseException) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Example 5 with TypeInformation

use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.

the class RowCsvInputFormatTest method testScanOrder.

@Test
public void testScanOrder() throws Exception {
    String fileContent = // first row
    "111|222|333|444|555|666|777|888|999|000|\n" + // second row
    "000|999|888|777|666|555|444|333|222|111|";
    FileInputSplit split = createTempFile(fileContent);
    TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
    int[] order = new int[] { 7, 3, 0 };
    RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, order);
    format.setFieldDelimiter("|");
    format.configure(new Configuration());
    format.open(split);
    Row result = new Row(3);
    // check first row
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(888, result.getField(0));
    assertEquals(444, result.getField(1));
    assertEquals(111, result.getField(2));
    // check second row
    result = format.nextRecord(result);
    assertNotNull(result);
    assertEquals(333, result.getField(0));
    assertEquals(777, result.getField(1));
    assertEquals(0, result.getField(2));
}
Also used : FileInputSplit(org.apache.flink.core.fs.FileInputSplit) Configuration(org.apache.flink.configuration.Configuration) Row(org.apache.flink.types.Row) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Test(org.junit.Test)

Aggregations

TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)51 Test (org.junit.Test)28 Row (org.apache.flink.types.Row)21 Configuration (org.apache.flink.configuration.Configuration)20 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)20 TupleTypeInfo (org.apache.flink.api.java.typeutils.TupleTypeInfo)10 ArrayList (java.util.ArrayList)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 CompositeType (org.apache.flink.api.common.typeutils.CompositeType)8 IOException (java.io.IOException)7 Type (java.lang.reflect.Type)7 GenericArrayType (java.lang.reflect.GenericArrayType)6 ParameterizedType (java.lang.reflect.ParameterizedType)6 Random (java.util.Random)6 InvalidTypesException (org.apache.flink.api.common.functions.InvalidTypesException)6 TypeExtractionUtils.isClassType (org.apache.flink.api.java.typeutils.TypeExtractionUtils.isClassType)6 ValueTypeInfo (org.apache.flink.api.java.typeutils.ValueTypeInfo)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)5 TypeVariable (java.lang.reflect.TypeVariable)4 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)4