use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testTailingEmptyFields.
@Test
public void testTailingEmptyFields() throws Exception {
String fileContent = "abc|def|ghijk\n" + "abc|def|\n" + "abc||\n" + "|||\n" + "||\n" + "abc|def\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("def", result.getField(1));
assertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("def", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
try {
format.nextRecord(result);
fail("RuntimeException: Row length mismatch. 3 fields expected but was 2");
} catch (IOException e) {
}
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testQuotedStringParsingWithIncludeFields.
@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <blah@blahblah.org>\"|\"blaaa\"|\"blubb\"";
File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), new Path(tempFile.toURI().toString())).setFieldDelimiter('|').setSelectedFields(new int[] { 0, 2 }).setQuoteCharacter('"');
RowCsvInputFormat inputFormat = builder.build();
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row record = inputFormat.nextRecord(new Row(2));
assertEquals("20:41:52-1-3-2015", record.getField(0));
assertEquals("Blahblah <blah@blahblah.org>", record.getField(1));
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testIntegerFields.
@Test
public void testIntegerFields() throws Exception {
String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(5);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(111, result.getField(0));
assertEquals(222, result.getField(1));
assertEquals(333, result.getField(2));
assertEquals(444, result.getField(3));
assertEquals(555, result.getField(4));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(666, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(888, result.getField(2));
assertEquals(999, result.getField(3));
assertEquals(0, result.getField(4));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testReadSparseWithPositionSetter.
@Test
public void testReadSparseWithPositionSetter() throws Exception {
String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setSelectedFields(new int[] { 0, 3, 7 });
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(111, result.getField(0));
assertEquals(444, result.getField(1));
assertEquals(888, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(0, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(333, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testEmptyFields.
@Test
public void testEmptyFields() throws Exception {
String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter(',').setNullLiteral("");
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(8);
int linesCnt = fileContent.split("\n").length;
for (int i = 0; i < linesCnt; i++) {
result = format.nextRecord(result);
assertNull(result.getField(i));
}
// ensure no more rows
assertNull(format.nextRecord(result));
assertTrue(format.reachedEnd());
}
Aggregations