Search in sources :

Example 1 with BaseStatistics

use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.

the class FileInputFormatTest method testGetStatisticsOneFileNoCachedVersion.

@Test
public void testGetStatisticsOneFileNoCachedVersion() {
    try {
        final long SIZE = 1024 * 500;
        String tempFile = TestFileUtils.createTempFile(SIZE);
        final DummyFileInputFormat format = new DummyFileInputFormat();
        format.setFilePath(tempFile);
        format.configure(new Configuration());
        BaseStatistics stats = format.getStatistics(null);
        Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) IOException(java.io.IOException) Test(org.junit.Test)

Example 2 with BaseStatistics

use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.

the class FileInputFormatTest method testGetStatisticsMultipleFilesNoCachedVersion.

@Test
public void testGetStatisticsMultipleFilesNoCachedVersion() {
    try {
        final long SIZE1 = 2077;
        final long SIZE2 = 31909;
        final long SIZE3 = 10;
        final long TOTAL = SIZE1 + SIZE2 + SIZE3;
        String tempDir = TestFileUtils.createTempFileDir(SIZE1, SIZE2, SIZE3);
        final DummyFileInputFormat format = new DummyFileInputFormat();
        format.setFilePath(tempDir);
        format.configure(new Configuration());
        BaseStatistics stats = format.getStatistics(null);
        Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize());
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with BaseStatistics

use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.

the class FileInputFormatTest method testGetStatisticsOneFileWithCachedVersion.

@Test
public void testGetStatisticsOneFileWithCachedVersion() {
    try {
        final long SIZE = 50873;
        final long FAKE_SIZE = 10065;
        String tempFile = TestFileUtils.createTempFile(SIZE);
        DummyFileInputFormat format = new DummyFileInputFormat();
        format.setFilePath(tempFile);
        format.configure(new Configuration());
        FileBaseStatistics stats = format.getStatistics(null);
        Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize());
        format = new DummyFileInputFormat();
        format.setFilePath(tempFile);
        format.configure(new Configuration());
        FileBaseStatistics newStats = format.getStatistics(stats);
        Assert.assertTrue("Statistics object was changed", newStats == stats);
        // insert fake stats with the correct modification time. the call should return the fake stats
        format = new DummyFileInputFormat();
        format.setFilePath(tempFile);
        format.configure(new Configuration());
        FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
        BaseStatistics latest = format.getStatistics(fakeStats);
        Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize());
        // insert fake stats with the expired modification time. the call should return new accurate stats
        format = new DummyFileInputFormat();
        format.setFilePath(tempFile);
        format.configure(new Configuration());
        FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
        BaseStatistics reGathered = format.getStatistics(outDatedFakeStats);
        Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());
    } catch (Exception ex) {
        ex.printStackTrace();
        Assert.fail(ex.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) FileBaseStatistics(org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with BaseStatistics

use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.

the class DelimitedInputFormatSamplingTest method testDifferentDelimiter.

@Test
public void testDifferentDelimiter() {
    try {
        final String DELIMITER = "12345678-";
        String testData = TEST_DATA1.replace("\n", DELIMITER);
        final String tempFile = TestFileUtils.createTempFile(testData);
        final Configuration conf = new Configuration();
        final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
        format.setFilePath(tempFile);
        format.setDelimiter(DELIMITER);
        format.configure(conf);
        BaseStatistics stats = format.getStatistics(null);
        final int numLines = TEST_DATA_1_LINES;
        final float avgWidth = ((float) testData.length()) / TEST_DATA_1_LINES;
        Assert.assertTrue("Wrong record count.", stats.getNumberOfRecords() < numLines + 1 & stats.getNumberOfRecords() > numLines - 1);
        Assert.assertTrue("Wrong avg record size.", stats.getAverageRecordWidth() < avgWidth + 1 & stats.getAverageRecordWidth() > avgWidth - 1);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) Test(org.junit.Test)

Example 5 with BaseStatistics

use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.

the class DelimitedInputFormatSamplingTest method testCachedStatistics.

@Test
public void testCachedStatistics() {
    try {
        final String tempFile = TestFileUtils.createTempFile(TEST_DATA1);
        final Configuration conf = new Configuration();
        final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
        format.setFilePath("test://" + tempFile);
        format.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        BaseStatistics stats = format.getStatistics(null);
        Assert.assertEquals("Wrong number of samples taken.", DEFAULT_NUM_SAMPLES, TestFileSystem.getNumtimeStreamOpened());
        final TestDelimitedInputFormat format2 = new TestDelimitedInputFormat(CONFIG);
        format2.setFilePath("test://" + tempFile);
        format2.configure(conf);
        TestFileSystem.resetStreamOpenCounter();
        BaseStatistics stats2 = format2.getStatistics(stats);
        Assert.assertTrue("Using cached statistics should cicumvent sampling.", 0 == TestFileSystem.getNumtimeStreamOpened());
        Assert.assertTrue("Using cached statistics should cicumvent sampling.", stats == stats2);
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) BaseStatistics(org.apache.flink.api.common.io.statistics.BaseStatistics) Test(org.junit.Test)

Aggregations

BaseStatistics (org.apache.flink.api.common.io.statistics.BaseStatistics)14 Test (org.junit.Test)13 Configuration (org.apache.flink.configuration.Configuration)11 IOException (java.io.IOException)8 FileBaseStatistics (org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics)6 File (java.io.File)3 Path (org.apache.flink.core.fs.Path)2