Search in sources :

Example 1 with Histogram

use of org.apache.drill.metastore.statistics.Histogram in project drill by apache.

the class DrillStatsTable method getEstimatedColumnStats.

/**
 * Returns list of {@link StatisticsKind} and statistics values obtained from specified {@link DrillStatsTable} for specified column.
 *
 * @param statsProvider the source of statistics
 * @param fieldName     name of the columns whose statistics should be obtained
 * @return list of {@link StatisticsKind} and statistics values
 */
public static List<StatisticsHolder<?>> getEstimatedColumnStats(DrillStatsTable statsProvider, SchemaPath fieldName) {
    if (statsProvider != null && statsProvider.isMaterialized()) {
        List<StatisticsHolder<?>> statisticsValues = new ArrayList<>();
        Double ndv = statsProvider.getNdv(fieldName);
        if (ndv != null) {
            statisticsValues.add(new StatisticsHolder<>(ndv, ColumnStatisticsKind.NDV));
        }
        Double nonNullCount = statsProvider.getNNRowCount(fieldName);
        if (nonNullCount != null) {
            statisticsValues.add(new StatisticsHolder<>(nonNullCount, ColumnStatisticsKind.NON_NULL_COUNT));
        }
        Histogram histogram = statsProvider.getHistogram(fieldName);
        if (histogram != null) {
            statisticsValues.add(new StatisticsHolder<>(histogram, ColumnStatisticsKind.HISTOGRAM));
        }
        Double rowcount = statsProvider.getRowCount();
        if (rowcount != null) {
            statisticsValues.add(new StatisticsHolder<>(rowcount, ColumnStatisticsKind.ROWCOUNT));
        }
        return statisticsValues;
    }
    return Collections.emptyList();
}
Also used : StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) Histogram(org.apache.drill.metastore.statistics.Histogram) ArrayList(java.util.ArrayList)

Example 2 with Histogram

use of org.apache.drill.metastore.statistics.Histogram in project drill by apache.

the class DrillRelMdSelectivity method computeRangeSelectivity.

// Use histogram if available for the range predicate selectivity
private double computeRangeSelectivity(TableMetadata tableMetadata, RexNode orPred, List<SchemaPath> fieldNames) {
    SchemaPath col = getColumn(orPred, fieldNames);
    if (col != null) {
        ColumnStatistics<?> columnStatistics = tableMetadata != null ? tableMetadata.getColumnStatistics(col) : null;
        Histogram histogram = columnStatistics != null ? ColumnStatisticsKind.HISTOGRAM.getFrom(columnStatistics) : null;
        if (histogram != null) {
            Double totalCount = ColumnStatisticsKind.ROWCOUNT.getFrom(columnStatistics);
            Double ndv = ColumnStatisticsKind.NDV.getFrom(columnStatistics);
            Double sel = histogram.estimatedSelectivity(orPred, totalCount.longValue(), ndv.longValue());
            if (sel != null) {
                return sel;
            }
        }
    }
    return guessSelectivity(orPred);
}
Also used : Histogram(org.apache.drill.metastore.statistics.Histogram) SchemaPath(org.apache.drill.common.expression.SchemaPath)

Aggregations

Histogram (org.apache.drill.metastore.statistics.Histogram)2 ArrayList (java.util.ArrayList)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)1