use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method ignoreInvalidLines.
@Test
public void ignoreInvalidLines() throws Exception {
String fileContent = "#description of the data\n" + "header1|header2|header3|\n" + "this is|1|2.0|\n" + "//a comment\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setIgnoreParseErrors(false);
RowCsvInputFormat format = builder.build();
Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
Row result = new Row(3);
try {
result = format.nextRecord(result);
fail("RuntimeException was not thrown! (Row length mismatch. 3 fields expected but was 1)");
} catch (IOException ignored) {
}
try {
result = format.nextRecord(result);
fail("NumberFormatException was not thrown! (For input string: \"header2\")");
} catch (IOException ignored) {
}
// => ok
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("this is", result.getField(0));
assertEquals(1, result.getField(1));
assertEquals(2.0, result.getField(2));
try {
result = format.nextRecord(result);
fail("RuntimeException was not thrown! (Row length mismatch. 3 fields expected but was 1)");
} catch (IOException ignored) {
}
// => ok
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("a test", result.getField(0));
assertEquals(3, result.getField(1));
assertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("#next", result.getField(0));
assertEquals(5, result.getField(1));
assertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
// re-open with lenient = true
builder.setIgnoreParseErrors(true);
format = builder.build();
format.configure(parameters);
format.open(split);
result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("#description of the data", result.getField(0));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("header1", result.getField(0));
assertNull(result.getField(1));
assertNull(result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("this is", result.getField(0));
assertEquals(1, result.getField(1));
assertEquals(2.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("//a comment", result.getField(0));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("a test", result.getField(0));
assertEquals(3, result.getField(1));
assertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("#next", result.getField(0));
assertEquals(5, result.getField(1));
assertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method readStringFields.
@Test
public void readStringFields() throws Exception {
String fileContent = "abc|def|ghijk\nabc||hhg\n|||\n||";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("def", result.getField(1));
assertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("hhg", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method readMixedQuotedStringFields.
@Test
public void readMixedQuotedStringFields() throws Exception {
String fileContent = "@a|b|c@|def|@ghijk@\nabc||@|hhg@\n|||\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setQuoteCharacter('@');
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("a|b|c", result.getField(0));
assertEquals("def", result.getField(1));
assertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("abc", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("|hhg", result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals("", result.getField(0));
assertEquals("", result.getField(1));
assertEquals("", result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testDoubleFields.
@Test
public void testDoubleFields() throws Exception {
String fileContent = "11.1|22.2|33.3|44.4|55.5\n66.6|77.7|88.8|99.9|00.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|');
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(5);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(11.1, result.getField(0));
assertEquals(22.2, result.getField(1));
assertEquals(33.3, result.getField(2));
assertEquals(44.4, result.getField(3));
assertEquals(55.5, result.getField(4));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(66.6, result.getField(0));
assertEquals(77.7, result.getField(1));
assertEquals(88.8, result.getField(2));
assertEquals(99.9, result.getField(3));
assertEquals(0.0, result.getField(4));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.api.java.typeutils.RowTypeInfo in project flink by apache.
the class RowCsvInputFormatTest method testScanOrder.
@Test
public void testScanOrder() throws Exception {
String fileContent = // first row
"111|222|333|444|555|666|777|888|999|000|\n" + // second row
"000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat.Builder builder = RowCsvInputFormat.builder(new RowTypeInfo(fieldTypes), PATH).setFieldDelimiter('|').setSelectedFields(new int[] { 7, 3, 0 });
RowCsvInputFormat format = builder.build();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
// check first row
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(888, result.getField(0));
assertEquals(444, result.getField(1));
assertEquals(111, result.getField(2));
// check second row
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(333, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(0, result.getField(2));
}
Aggregations