use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class FileInputFormatTest method testGetStatisticsOneFileNoCachedVersion.
@Test
public void testGetStatisticsOneFileNoCachedVersion() {
try {
final long SIZE = 1024 * 500;
String tempFile = TestFileUtils.createTempFile(SIZE);
final DummyFileInputFormat format = new DummyFileInputFormat();
format.setFilePath(tempFile);
format.configure(new Configuration());
BaseStatistics stats = format.getStatistics(null);
Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class FileInputFormatTest method testGetStatisticsMultipleFilesNoCachedVersion.
@Test
public void testGetStatisticsMultipleFilesNoCachedVersion() {
try {
final long SIZE1 = 2077;
final long SIZE2 = 31909;
final long SIZE3 = 10;
final long TOTAL = SIZE1 + SIZE2 + SIZE3;
String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3);
final DummyFileInputFormat format = new DummyFileInputFormat();
format.setFilePath(tempDir);
format.configure(new Configuration());
BaseStatistics stats = format.getStatistics(null);
Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class FileInputFormatTest method testGetStatisticsOneFileWithCachedVersion.
@Test
public void testGetStatisticsOneFileWithCachedVersion() {
try {
final long SIZE = 50873;
final long FAKE_SIZE = 10065;
String tempFile = TestFileUtils.createTempFile(SIZE);
DummyFileInputFormat format = new DummyFileInputFormat();
format.setFilePath(tempFile);
format.configure(new Configuration());
FileBaseStatistics stats = format.getStatistics(null);
Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
format = new DummyFileInputFormat();
format.setFilePath(tempFile);
format.configure(new Configuration());
FileBaseStatistics newStats = format.getStatistics(stats);
Assert.assertTrue("Statistics object was changed", newStats == stats);
// insert fake stats with the correct modification time. the call should return the fake stats
format = new DummyFileInputFormat();
format.setFilePath(tempFile);
format.configure(new Configuration());
FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
BaseStatistics latest = format.getStatistics(fakeStats);
Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
// insert fake stats with the expired modification time. the call should return new accurate stats
format = new DummyFileInputFormat();
format.setFilePath(tempFile);
format.configure(new Configuration());
FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class DelimitedInputFormatSamplingTest method testDifferentDelimiter.
@Test
public void testDifferentDelimiter() {
try {
final String DELIMITER = "12345678-";
String testData = TEST_DATA1.replace("\n", DELIMITER);
final String tempFile = TestFileUtils.createTempFile(testData);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath(tempFile);
format.setDelimiter(DELIMITER);
format.configure(conf);
BaseStatistics stats = format.getStatistics(null);
final int numLines = TEST_DATA_1_LINES;
final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class DelimitedInputFormatSamplingTest method testCachedStatistics.
@Test
public void testCachedStatistics() {
try {
final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath("test://" + tempFile);
format.configure(conf);
TestFileSystem.resetStreamOpenCounter();
BaseStatistics stats = format.getStatistics(null);
Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
format2.setFilePath("test://" + tempFile);
format2.configure(conf);
TestFileSystem.resetStreamOpenCounter();
BaseStatistics stats2 = format2.getStatistics(stats);
Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations