Search in sources :

Example 1 with Estimate

use of com.facebook.presto.spi.statistics.Estimate in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateDataSize.

@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
    List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
        if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
            return false;
        }
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        if (columnStatistics == null) {
            return false;
        }
        return columnStatistics.getTotalSizeInBytes().isPresent();
    }).collect(toImmutableList());
    if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
        return Estimate.unknown();
    }
    long knownRowCount = 0;
    long knownDataSize = 0;
    for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
        long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
        verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        verify(columnStatistics != null, "columnStatistics is null");
        long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
        verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
        knownRowCount += rowCount;
        knownDataSize += dataSize;
    }
    if (totalRowCount == 0) {
        return Estimate.zero();
    }
    if (knownRowCount == 0) {
        return Estimate.unknown();
    }
    double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
    return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) VerifyException(com.google.common.base.VerifyException) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with Estimate

use of com.facebook.presto.spi.statistics.Estimate in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateNullsFraction.

@VisibleForTesting
static Estimate calculateNullsFraction(String column, Collection<PartitionStatistics> partitionStatistics) {
    List<PartitionStatistics> statisticsWithKnownRowCountAndNullsCount = partitionStatistics.stream().filter(statistics -> {
        if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
            return false;
        }
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        if (columnStatistics == null) {
            return false;
        }
        return columnStatistics.getNullsCount().isPresent();
    }).collect(toImmutableList());
    if (statisticsWithKnownRowCountAndNullsCount.isEmpty()) {
        return Estimate.unknown();
    }
    long totalNullsCount = 0;
    long totalRowCount = 0;
    for (PartitionStatistics statistics : statisticsWithKnownRowCountAndNullsCount) {
        long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
        verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        verify(columnStatistics != null, "columnStatistics is null");
        long nullsCount = columnStatistics.getNullsCount().orElseThrow(() -> new VerifyException("nullsCount is not present"));
        verify(nullsCount >= 0, "nullsCount must be greater than or equal to zero");
        verify(nullsCount <= rowCount, "nullsCount must be less than or equal to rowCount. nullsCount: %s. rowCount: %s.", nullsCount, rowCount);
        totalNullsCount += nullsCount;
        totalRowCount += rowCount;
    }
    if (totalRowCount == 0) {
        return Estimate.zero();
    }
    verify(totalNullsCount <= totalRowCount, "totalNullsCount must be less than or equal to totalRowCount. totalNullsCount: %s. totalRowCount: %s.", totalNullsCount, totalRowCount);
    return Estimate.of(((double) totalNullsCount) / totalRowCount);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) VerifyException(com.google.common.base.VerifyException) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Logger (com.facebook.airlift.log.Logger)2 NullableValue (com.facebook.presto.common.predicate.NullableValue)2 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)2 Chars.isCharType (com.facebook.presto.common.type.Chars.isCharType)2 DATE (com.facebook.presto.common.type.DateType.DATE)2 DecimalType (com.facebook.presto.common.type.DecimalType)2 Decimals (com.facebook.presto.common.type.Decimals)2 Decimals.isLongDecimal (com.facebook.presto.common.type.Decimals.isLongDecimal)2 Decimals.isShortDecimal (com.facebook.presto.common.type.Decimals.isShortDecimal)2 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)2 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)2 REAL (com.facebook.presto.common.type.RealType.REAL)2 SMALLINT (com.facebook.presto.common.type.SmallintType.SMALLINT)2 TINYINT (com.facebook.presto.common.type.TinyintType.TINYINT)2 Type (com.facebook.presto.common.type.Type)2 Varchars.isVarcharType (com.facebook.presto.common.type.Varchars.isVarcharType)2 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)2 HIVE_CORRUPTED_COLUMN_STATISTICS (com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS)2 HivePartition (com.facebook.presto.hive.HivePartition)2