use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class RowCsvInputFormatTest method testReadSparseWithPositionSetter.
@Test
public void testReadSparseWithPositionSetter() throws Exception {
String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(111, result.getField(0));
assertEquals(444, result.getField(1));
assertEquals(888, result.getField(2));
result = format.nextRecord(result);
assertNotNull(result);
assertEquals(0, result.getField(0));
assertEquals(777, result.getField(1));
assertEquals(333, result.getField(2));
result = format.nextRecord(result);
assertNull(result);
assertTrue(format.reachedEnd());
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class RowCsvInputFormatTest method testEmptyFields.
@Test
public void testEmptyFields() throws Exception {
String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, true);
format.setFieldDelimiter(",");
format.configure(new Configuration());
format.open(split);
Row result = new Row(8);
int linesCnt = fileContent.split("\n").length;
for (int i = 0; i < linesCnt; i++) {
result = format.nextRecord(result);
assertNull(result.getField(i));
}
// ensure no more rows
assertNull(format.nextRecord(result));
assertTrue(format.reachedEnd());
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class RowCsvInputFormatTest method testRemovingTrailingCR.
private static void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) throws IOException {
String fileContent = FIRST_PART + lineBreakerInFile + SECOND_PART + lineBreakerInFile;
// create input file
File tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write(fileContent);
wrt.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes);
inputFormat.configure(new Configuration());
inputFormat.setDelimiter(lineBreakerSetup);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row result = inputFormat.nextRecord(new Row(1));
assertNotNull("Expecting to not return null", result);
assertEquals(FIRST_PART, result.getField(0));
result = inputFormat.nextRecord(result);
assertNotNull("Expecting to not return null", result);
assertEquals(SECOND_PART, result.getField(0));
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class TextInputFormatTest method testSimpleRead.
@Test
public void testSimpleRead() {
final String FIRST = "First line";
final String SECOND = "Second line";
try {
// create input file
File tempFile = File.createTempFile("TextInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
PrintStream ps = new PrintStream(tempFile);
ps.println(FIRST);
ps.println(SECOND);
ps.close();
TextInputFormat inputFormat = new TextInputFormat(new Path(tempFile.toURI().toString()));
Configuration parameters = new Configuration();
inputFormat.configure(parameters);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
assertTrue("expected at least one input split", splits.length >= 1);
inputFormat.open(splits[0]);
String result = "";
assertFalse(inputFormat.reachedEnd());
result = inputFormat.nextRecord("");
assertNotNull("Expecting first record here", result);
assertEquals(FIRST, result);
assertFalse(inputFormat.reachedEnd());
result = inputFormat.nextRecord(result);
assertNotNull("Expecting second record here", result);
assertEquals(SECOND, result);
assertTrue(inputFormat.reachedEnd() || null == inputFormat.nextRecord(result));
} catch (Throwable t) {
System.err.println("test failed with exception: " + t.getMessage());
t.printStackTrace(System.err);
fail("Test erroneous");
}
}
use of org.apache.flink.core.fs.FileInputSplit in project flink by apache.
the class CsvInputFormatTest method testPojoTypeWithMappingInfoAndPartialField.
@Test
public void testPojoTypeWithMappingInfoAndPartialField() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,3.123,AAA,BBB\n");
wrt.write("456,1.123,BBB,AAA\n");
wrt.close();
@SuppressWarnings("unchecked") PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.class);
CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field4" }, new boolean[] { true, false, false, true });
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
PojoItem item = new PojoItem();
inputFormat.nextRecord(item);
assertEquals(123, item.field1);
assertEquals("BBB", item.field4);
}
Aggregations