use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class StreamExecutionEnvironment method addSource.
/**
* Ads a data source with a custom type information thus opening a
* {@link DataStream}. Only in very special cases does the user need to
* support type information. Otherwise use
* {@link #addSource(org.apache.flink.streaming.api.functions.source.SourceFunction)}
*
* @param function
* the user defined function
* @param sourceName
* Name of the data source
* @param <OUT>
* type of the returned stream
* @param typeInfo
* the user defined type information for the stream
* @return the data stream constructed
*/
@SuppressWarnings("unchecked")
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {
if (typeInfo == null) {
if (function instanceof ResultTypeQueryable) {
typeInfo = ((ResultTypeQueryable<OUT>) function).getProducedType();
} else {
try {
typeInfo = TypeExtractor.createTypeInfo(SourceFunction.class, function.getClass(), 0, null, null);
} catch (final InvalidTypesException e) {
typeInfo = (TypeInformation<OUT>) new MissingTypeInfo(sourceName, e);
}
}
}
boolean isParallel = function instanceof ParallelSourceFunction;
clean(function);
StreamSource<OUT, ?> sourceOperator;
if (function instanceof StoppableFunction) {
sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
} else {
sourceOperator = new StreamSource<>(function);
}
return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class RowCsvInputFormatTest method testReadSparseWithNullFieldsForTypes.
@Test
public void testReadSparseWithNullFieldsForTypes() throws Exception {
String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
format.setFieldDelimiter("|x|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(111, result.getField(0));
assertEquals(444, result.getField(1));
assertEquals(888, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(0, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(333, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class RowCsvInputFormatTest method readStringFields.
@Test
public void readStringFields() throws Exception {
String fileContent = "abc|def|ghijk\nabc||hhg\n|||\n||";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("def", result.getField(1));
assertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("hhg", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class RowCsvInputFormatTest method ignoreInvalidLines.
@Test
public void ignoreInvalidLines() throws Exception {
String fileContent = "#description of the data\n" + "header1|header2|header3|\n" + "this is|1|2.0|\n" + "//a comment\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setLenient(false);
Configuration parameters = new Configuration();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException ignored) {
}
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Invalid int value)");
} catch (ParseException ignored) {
}
// => ok
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("this is", result.getField(0));
assertEquals(1, result.getField(1));
assertEquals(2.0, result.getField(2));
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException ignored) {
}
// => ok
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("a test", result.getField(0));
assertEquals(3, result.getField(1));
assertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("#next", result.getField(0));
assertEquals(5, result.getField(1));
assertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
// re-open with lenient = true
format.setLenient(true);
format.configure(parameters);
format.open(split);
result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("header1", result.getField(0));
assertNull(result.getField(1));
assertNull(result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("this is", result.getField(0));
assertEquals(1, result.getField(1));
assertEquals(2.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("a test", result.getField(0));
assertEquals(3, result.getField(1));
assertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("#next", result.getField(0));
assertEquals(5, result.getField(1));
assertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
}
use of org.apache.flink.api.common.typeinfo.TypeInformation in project flink by apache.
the class RowCsvInputFormatTest method testScanOrder.
@Test
public void testScanOrder() throws Exception {
String fileContent = // first row
"111|222|333|444|555|666|777|888|999|000|\n" + // second row
"000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
int[] order = new int[] { 7, 3, 0 };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, order);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
// check first row
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(888, result.getField(0));
assertEquals(444, result.getField(1));
assertEquals(111, result.getField(2));
// check second row
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(333, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(0, result.getField(2));
}
Aggregations