Search in sources :

Example 31 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testCreateDataColumnStatistics.

@Test
public void testCreateDataColumnStatistics() {
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of()), ColumnStatistics.empty());
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(PartitionStatistics.empty(), PartitionStatistics.empty())), ColumnStatistics.empty());
    assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(new PartitionStatistics(HiveBasicStatistics.createZeroStatistics(), ImmutableMap.of("column2", HiveColumnStatistics.empty())))), ColumnStatistics.empty());
}
Also used : MetastoreHiveStatisticsProvider.validatePartitionStatistics(com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Test(org.testng.annotations.Test)

Example 32 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class AbstractTestHiveClient method testUpdateTableStatistics.

protected void testUpdateTableStatistics(SchemaTableName tableName, PartitionStatistics initialStatistics, PartitionStatistics... statistics) {
    ExtendedHiveMetastore metastoreClient = getMetastoreClient();
    assertThat(metastoreClient.getTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(initialStatistics);
    AtomicReference<PartitionStatistics> expectedStatistics = new AtomicReference<>(initialStatistics);
    for (PartitionStatistics partitionStatistics : statistics) {
        metastoreClient.updateTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), actualStatistics -> {
            assertThat(actualStatistics).isEqualTo(expectedStatistics.get());
            return partitionStatistics;
        });
        assertThat(metastoreClient.getTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(partitionStatistics);
        expectedStatistics.set(partitionStatistics);
    }
    assertThat(metastoreClient.getTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(expectedStatistics.get());
    metastoreClient.updateTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), actualStatistics -> {
        assertThat(actualStatistics).isEqualTo(expectedStatistics.get());
        return initialStatistics;
    });
    assertThat(metastoreClient.getTableStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName())).isEqualTo(initialStatistics);
}
Also used : PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) AtomicReference(java.util.concurrent.atomic.AtomicReference) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore)

Example 33 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class AbstractTestHiveClient method eraseStatistics.

private void eraseStatistics(SchemaTableName schemaTableName) {
    ExtendedHiveMetastore metastoreClient = getMetastoreClient();
    metastoreClient.updateTableStatistics(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
    Table table = metastoreClient.getTable(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    if (!table.getPartitionColumns().isEmpty()) {
        List<String> partitionNames = metastoreClient.getPartitionNames(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(ImmutableList.of());
        List<Partition> partitions = metastoreClient.getPartitionsByNames(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames).entrySet().stream().map(Map.Entry::getValue).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList());
        for (Partition partition : partitions) {
            metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
        }
    }
}
Also used : TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Partition(com.facebook.presto.hive.metastore.Partition) Table(com.facebook.presto.hive.metastore.Table) Optional(java.util.Optional) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 34 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateDataSize.

@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
    List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
        if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
            return false;
        }
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        if (columnStatistics == null) {
            return false;
        }
        return columnStatistics.getTotalSizeInBytes().isPresent();
    }).collect(toImmutableList());
    if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
        return Estimate.unknown();
    }
    long knownRowCount = 0;
    long knownDataSize = 0;
    for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
        long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
        verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        verify(columnStatistics != null, "columnStatistics is null");
        long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
        verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
        knownRowCount += rowCount;
        knownDataSize += dataSize;
    }
    if (totalRowCount == 0) {
        return Estimate.zero();
    }
    if (knownRowCount == 0) {
        return Estimate.unknown();
    }
    double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
    return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) VerifyException(com.google.common.base.VerifyException) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 35 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateNullsFraction.

@VisibleForTesting
static Estimate calculateNullsFraction(String column, Collection<PartitionStatistics> partitionStatistics) {
    List<PartitionStatistics> statisticsWithKnownRowCountAndNullsCount = partitionStatistics.stream().filter(statistics -> {
        if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
            return false;
        }
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        if (columnStatistics == null) {
            return false;
        }
        return columnStatistics.getNullsCount().isPresent();
    }).collect(toImmutableList());
    if (statisticsWithKnownRowCountAndNullsCount.isEmpty()) {
        return Estimate.unknown();
    }
    long totalNullsCount = 0;
    long totalRowCount = 0;
    for (PartitionStatistics statistics : statisticsWithKnownRowCountAndNullsCount) {
        long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
        verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
        HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
        verify(columnStatistics != null, "columnStatistics is null");
        long nullsCount = columnStatistics.getNullsCount().orElseThrow(() -> new VerifyException("nullsCount is not present"));
        verify(nullsCount >= 0, "nullsCount must be greater than or equal to zero");
        verify(nullsCount <= rowCount, "nullsCount must be less than or equal to rowCount. nullsCount: %s. rowCount: %s.", nullsCount, rowCount);
        totalNullsCount += nullsCount;
        totalRowCount += rowCount;
    }
    if (totalRowCount == 0) {
        return Estimate.zero();
    }
    verify(totalNullsCount <= totalRowCount, "totalNullsCount must be less than or equal to totalRowCount. totalNullsCount: %s. totalRowCount: %s.", totalNullsCount, totalRowCount);
    return Estimate.of(((double) totalNullsCount) / totalRowCount);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) VerifyException(com.google.common.base.VerifyException) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)35 SchemaTableName (com.facebook.presto.spi.SchemaTableName)20 PrestoException (com.facebook.presto.spi.PrestoException)19 ImmutableMap (com.google.common.collect.ImmutableMap)15 HiveColumnStatistics (com.facebook.presto.hive.metastore.HiveColumnStatistics)14 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)14 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)13 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)13 Table (com.facebook.presto.hive.metastore.Table)13 Map (java.util.Map)12 Optional (java.util.Optional)12 OptionalLong (java.util.OptionalLong)11 Type (com.facebook.presto.common.type.Type)10 Column (com.facebook.presto.hive.metastore.Column)10 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)10 List (java.util.List)10 Objects.requireNonNull (java.util.Objects.requireNonNull)10 Set (java.util.Set)10 Domain (com.facebook.presto.common.predicate.Domain)8