use of org.apache.flink.configuration.Configuration in project flink by apache.
the class GenericCsvInputFormatTest method testSparseParseWithIndicesMultiCharDelimiter.
@SuppressWarnings("unchecked")
@Test
public void testSparseParseWithIndicesMultiCharDelimiter() {
try {
final String fileContent = "111|-|222|-|333|-|444|-|555|-|666|-|777|-|888|-|999|-|000|-|\n" + "000|-|999|-|888|-|777|-|666|-|555|-|444|-|333|-|222|-|111\n" + "555|-|999|-|888|-|111|-|666|-|555|-|444|-|777|-|222|-|111|-|\n" + "22222|-|99999|-|8|-|99999999|-|6666666|-|5|-|4444|-|8|-|22222|-|1\n";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|-|");
format.setFieldsGeneric(new int[] { 0, 3, 7 }, (Class<? extends Value>[]) new Class[] { IntValue.class, IntValue.class, IntValue.class });
format.configure(parameters);
format.open(split);
Value[] values = createIntValues(3);
values = format.nextRecord(values);
assertNotNull(values);
assertEquals(111, ((IntValue) values[0]).getValue());
assertEquals(444, ((IntValue) values[1]).getValue());
assertEquals(888, ((IntValue) values[2]).getValue());
values = format.nextRecord(values);
assertNotNull(values);
assertEquals(000, ((IntValue) values[0]).getValue());
assertEquals(777, ((IntValue) values[1]).getValue());
assertEquals(333, ((IntValue) values[2]).getValue());
values = format.nextRecord(values);
assertNotNull(values);
assertEquals(555, ((IntValue) values[0]).getValue());
assertEquals(111, ((IntValue) values[1]).getValue());
assertEquals(777, ((IntValue) values[2]).getValue());
values = format.nextRecord(values);
assertNotNull(values);
assertEquals(22222, ((IntValue) values[0]).getValue());
assertEquals(99999999, ((IntValue) values[1]).getValue());
assertEquals(8, ((IntValue) values[2]).getValue());
assertNull(format.nextRecord(values));
assertTrue(format.reachedEnd());
} catch (Exception ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
fail("Test erroneous");
}
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class GenericCsvInputFormatTest method testReadInvalidContentsLenient.
@Test
public void testReadInvalidContentsLenient() {
try {
final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
final FileInputSplit split = createTempFile(fileContent);
final Configuration parameters = new Configuration();
format.setFieldDelimiter("|");
format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
format.setLenient(true);
format.configure(parameters);
format.open(split);
Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };
assertNotNull(format.nextRecord(values));
assertNull(format.nextRecord(values));
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
}
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class DelimitedInputFormatSamplingTest method testNumSamplesMultipleFiles.
@Test
public void testNumSamplesMultipleFiles() {
try {
final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA1, TEST_DATA1, TEST_DATA1);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath(tempFile.replace("file", "test"));
format.configure(conf);
TestFileSystem.resetStreamOpenCounter();
format.getStatistics(null);
Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
format2.setFilePath(tempFile.replace("file", "test"));
format2.setNumLineSamples(8);
format2.configure(conf);
TestFileSystem.resetStreamOpenCounter();
format2.getStatistics(null);
Assert.assertEquals("Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class DelimitedInputFormatSamplingTest method testDifferentDelimiter.
@Test
public void testDifferentDelimiter() {
try {
final String DELIMITER = "12345678-";
String testData = TEST_DATA1.replace("\n", DELIMITER);
final String tempFile = TestFileUtils.createTempFile(testData);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath(tempFile);
format.setDelimiter(DELIMITER);
format.configure(conf);
BaseStatistics stats = format.getStatistics(null);
final int numLines = TEST_DATA_1_LINES;
final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.configuration.Configuration in project flink by apache.
the class DelimitedInputFormatSamplingTest method testNumSamplesOneFile.
// ========================================================================
// Tests
// ========================================================================
@Test
public void testNumSamplesOneFile() {
try {
final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath(tempFile.replace("file", "test"));
format.configure(conf);
TestFileSystem.resetStreamOpenCounter();
format.getStatistics(null);
Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
format2.setFilePath(tempFile.replace("file", "test"));
format2.setNumLineSamples(8);
format2.configure(conf);
TestFileSystem.resetStreamOpenCounter();
format2.getStatistics(null);
Assert.assertEquals("Wrong number of samples taken.", 8, TestFileSystem.getNumtimeStreamOpened());
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations