use of org.apache.drill.exec.vector.accessor.TupleReader in project drill by apache.
the class MetadataControllerBatch method getColumnStatistics.
private Map<SchemaPath, ColumnStatistics<?>> getColumnStatistics(TupleReader reader, TupleMetadata columnMetadata, Long rowCount) {
Multimap<String, StatisticsHolder<?>> columnStatistics = ArrayListMultimap.create();
Map<String, TypeProtos.MinorType> columnTypes = new HashMap<>();
for (ColumnMetadata column : columnMetadata) {
if (AnalyzeColumnUtils.isColumnStatisticsField(column.name())) {
String fieldName = AnalyzeColumnUtils.getColumnName(column.name());
StatisticsKind<?> statisticsKind = AnalyzeColumnUtils.getStatisticsKind(column.name());
columnStatistics.put(fieldName, new StatisticsHolder<>(getConvertedColumnValue(reader.column(column.name())), statisticsKind));
if (statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.MIN_VALUE.getName()) || statisticsKind.getName().equalsIgnoreCase(ColumnStatisticsKind.MAX_VALUE.getName())) {
columnTypes.putIfAbsent(fieldName, column.type());
}
}
}
// adds NON_NULL_COUNT to use it during filter pushdown
if (rowCount != null) {
Map<String, StatisticsHolder<?>> nullsCountColumnStatistics = new HashMap<>();
columnStatistics.asMap().forEach((key, value) -> value.stream().filter(statisticsHolder -> statisticsHolder.getStatisticsKind() == ColumnStatisticsKind.NON_NULL_VALUES_COUNT).findAny().map(statisticsHolder -> (Long) statisticsHolder.getStatisticsValue()).ifPresent(nonNullCount -> nullsCountColumnStatistics.put(key, new StatisticsHolder<>(rowCount - nonNullCount, ColumnStatisticsKind.NULLS_COUNT))));
nullsCountColumnStatistics.forEach(columnStatistics::put);
}
Map<SchemaPath, ColumnStatistics<?>> resultingStats = new HashMap<>();
columnStatistics.asMap().forEach((fieldName, statisticsHolders) -> resultingStats.put(SchemaPath.parseFromString(fieldName), new ColumnStatistics<>(statisticsHolders, columnTypes.get(fieldName))));
return resultingStats;
}
Aggregations