use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class FileInputFormatTest method testGetStatisticsNonExistingFile.
// ------------------------------------------------------------------------
// Statistics
// ------------------------------------------------------------------------
@Test
public void testGetStatisticsNonExistingFile() {
try {
final DummyFileInputFormat format = new DummyFileInputFormat();
format.setFilePath("file:///some/none/existing/directory/");
format.configure(new Configuration());
BaseStatistics stats = format.getStatistics(null);
Assert.assertNull("The file statistics should be null.", stats);
} catch (Exception ex) {
ex.printStackTrace();
Assert.fail(ex.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class DelimitedInputFormatSamplingTest method testSamplingDirectory.
@Test
public void testSamplingDirectory() {
try {
final String tempFile = TestFileUtils.createTempFileDir(TEST_DATA1, TEST_DATA2);
final Configuration conf = new Configuration();
final TestDelimitedInputFormat format = new TestDelimitedInputFormat(CONFIG);
format.setFilePath(tempFile);
format.configure(conf);
BaseStatistics stats = format.getStatistics(null);
final int maxNumLines = (int) Math.ceil(TOTAL_SIZE / ((double) Math.min(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
final int minNumLines = (int) (TOTAL_SIZE / ((double) Math.max(TEST_DATA_1_LINEWIDTH, TEST_DATA_2_LINEWIDTH)));
final float maxAvgWidth = ((float) (TOTAL_SIZE)) / minNumLines;
final float minAvgWidth = ((float) (TOTAL_SIZE)) / maxNumLines;
if (!(stats.getNumberOfRecords() <= maxNumLines & stats.getNumberOfRecords() >= minNumLines)) {
System.err.println("Records: " + stats.getNumberOfRecords() + " out of (" + minNumLines + ", " + maxNumLines + ").");
Assert.fail("Wrong record count.");
}
if (!(stats.getAverageRecordWidth() <= maxAvgWidth & stats.getAverageRecordWidth() >= minAvgWidth)) {
Assert.fail("Wrong avg record size.");
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class SequentialFormatTestBase method checkStatistics.
/**
* Tests the statistics of the given format.
*/
@Test
public void checkStatistics() {
BinaryInputFormat<T> input = this.createInputFormat();
BaseStatistics statistics = input.getStatistics(null);
Assert.assertEquals(this.numberOfTuples, statistics.getNumberOfRecords());
}
use of org.apache.flink.api.common.io.statistics.BaseStatistics in project flink by apache.
the class DataSourceNode method computeOperatorSpecificDefaultEstimates.
@Override
protected void computeOperatorSpecificDefaultEstimates(DataStatistics statistics) {
// see, if we have a statistics object that can tell us a bit about the file
if (statistics != null) {
// instantiate the input format, as this is needed by the statistics
InputFormat<?, ?> format;
String inFormatDescription = "<unknown>";
try {
format = getOperator().getFormatWrapper().getUserCodeObject();
Configuration config = getOperator().getParameters();
format.configure(config);
} catch (Throwable t) {
if (Optimizer.LOG.isWarnEnabled()) {
Optimizer.LOG.warn("Could not instantiate InputFormat to obtain statistics." + " Limited statistics will be available.", t);
}
return;
}
try {
inFormatDescription = format.toString();
} catch (Throwable t) {
// we can ignore this error, as it only prevents us to use a cosmetic string
}
// first of all, get the statistics from the cache
final String statisticsKey = getOperator().getStatisticsKey();
final BaseStatistics cachedStatistics = statistics.getBaseStatistics(statisticsKey);
BaseStatistics bs = null;
try {
bs = format.getStatistics(cachedStatistics);
} catch (Throwable t) {
if (Optimizer.LOG.isWarnEnabled()) {
Optimizer.LOG.warn("Error obtaining statistics from input format: " + t.getMessage(), t);
}
}
if (bs != null) {
final long len = bs.getTotalInputSize();
if (len == BaseStatistics.SIZE_UNKNOWN) {
if (Optimizer.LOG.isInfoEnabled()) {
Optimizer.LOG.info("Compiler could not determine the size of input '" + inFormatDescription + "'. Using default estimates.");
}
} else if (len >= 0) {
this.estimatedOutputSize = len;
}
final long card = bs.getNumberOfRecords();
if (card != BaseStatistics.NUM_RECORDS_UNKNOWN) {
this.estimatedNumRecords = card;
}
}
}
}
Aggregations