Search in sources :

Example 1 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
Also used : TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) CacheConfig(com.facebook.presto.cache.CacheConfig) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) HivePartition(com.facebook.presto.hive.HivePartition) Test(org.testng.annotations.Test)

Example 2 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.

@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
    if (!isRangeSupported(type)) {
        return Optional.empty();
    }
    List<Double> values = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
    if (values.isEmpty()) {
        return Optional.empty();
    }
    double min = values.get(0);
    double max = values.get(0);
    for (Double value : values) {
        if (value > max) {
            max = value;
        }
        if (value < min) {
            min = value;
        }
    }
    return Optional.of(new DoubleRange(min, max));
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) Collections.unmodifiableList(java.util.Collections.unmodifiableList) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) Double.parseDouble(java.lang.Double.parseDouble) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) List(java.util.List) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) HiveSessionProperties.isIgnoreCorruptedStatistics(com.facebook.presto.hive.HiveSessionProperties.isIgnoreCorruptedStatistics) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) LocalDate(java.time.LocalDate) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashFunction(com.google.common.hash.HashFunction) Logger(com.facebook.airlift.log.Logger) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) OptionalDouble(java.util.OptionalDouble) Shorts(com.google.common.primitives.Shorts) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) OptionalLong(java.util.OptionalLong) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Double.isFinite(java.lang.Double.isFinite) HIVE_CORRUPTED_COLUMN_STATISTICS(com.facebook.presto.hive.HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) SignedBytes(com.google.common.primitives.SignedBytes) Decimals(com.facebook.presto.common.type.Decimals) Hashing.murmur3_128(com.google.common.hash.Hashing.murmur3_128) Ints(com.google.common.primitives.Ints) HiveSessionProperties.getPartitionStatisticsSampleSize(com.facebook.presto.hive.HiveSessionProperties.getPartitionStatisticsSampleSize) HivePartition(com.facebook.presto.hive.HivePartition) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Double.isNaN(java.lang.Double.isNaN) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Estimate(com.facebook.presto.spi.statistics.Estimate) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) NullableValue(com.facebook.presto.common.predicate.NullableValue) Double.parseDouble(java.lang.Double.parseDouble) OptionalDouble(java.util.OptionalDouble) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method calculateDataSizeForPartitioningKey.

@VisibleForTesting
static Estimate calculateDataSizeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics, double averageRowsPerPartition) {
    if (!hasDataSize(type)) {
        return Estimate.unknown();
    }
    double dataSize = 0;
    for (HivePartition partition : partitions) {
        int length = getSize(partition.getKeys().get(column));
        double rowCount = getPartitionRowCount(partition.getPartitionId(), statistics).orElse(averageRowsPerPartition);
        dataSize += length * rowCount;
    }
    return Estimate.of(dataSize);
}
Also used : HivePartition(com.facebook.presto.hive.HivePartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testNullablePartitionValue.

@Test
public void testNullablePartitionValue() {
    HivePartition partitionWithNull = partition("p1=__HIVE_DEFAULT_PARTITION__/p2=1");
    assertTrue(partitionWithNull.getKeys().containsValue(new NullableValue(VARCHAR, null)));
    HivePartition partitionNameWithSlashN = partition("p1=\\N/p2=2");
    assertTrue(partitionNameWithSlashN.getKeys().containsValue(new NullableValue(VARCHAR, Slices.utf8Slice("\\N"))));
    assertFalse(partitionNameWithSlashN.getKeys().containsValue(new NullableValue(VARCHAR, null)));
}
Also used : NullableValue(com.facebook.presto.common.predicate.NullableValue) HivePartition(com.facebook.presto.hive.HivePartition) Test(org.testng.annotations.Test)

Example 5 with HivePartition

use of com.facebook.presto.hive.HivePartition in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testGetPartitionsSample.

@Test
public void testGetPartitionsSample() {
    HivePartition p1 = partition("p1=string1/p2=1234");
    HivePartition p2 = partition("p1=string2/p2=2345");
    HivePartition p3 = partition("p1=string3/p2=3456");
    HivePartition p4 = partition("p1=string4/p2=4567");
    HivePartition p5 = partition("p1=string5/p2=5678");
    assertEquals(getPartitionsSample(ImmutableList.of(p1), 1), ImmutableList.of(p1));
    assertEquals(getPartitionsSample(ImmutableList.of(p1), 2), ImmutableList.of(p1));
    assertEquals(getPartitionsSample(ImmutableList.of(p1, p2), 2), ImmutableList.of(p1, p2));
    assertEquals(getPartitionsSample(ImmutableList.of(p1, p2, p3), 2), ImmutableList.of(p1, p3));
    assertEquals(getPartitionsSample(ImmutableList.of(p1, p2, p3, p4), 1), getPartitionsSample(ImmutableList.of(p1, p2, p3, p4), 1));
    assertEquals(getPartitionsSample(ImmutableList.of(p1, p2, p3, p4), 3), getPartitionsSample(ImmutableList.of(p1, p2, p3, p4), 3));
    assertEquals(getPartitionsSample(ImmutableList.of(p1, p2, p3, p4, p5), 3), ImmutableList.of(p1, p5, p4));
}
Also used : HivePartition(com.facebook.presto.hive.HivePartition) Test(org.testng.annotations.Test)

Aggregations

HivePartition (com.facebook.presto.hive.HivePartition)6 NullableValue (com.facebook.presto.common.predicate.NullableValue)3 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)3 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)3 PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)3 Logger (com.facebook.airlift.log.Logger)2 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)2 Chars.isCharType (com.facebook.presto.common.type.Chars.isCharType)2 DATE (com.facebook.presto.common.type.DateType.DATE)2 DecimalType (com.facebook.presto.common.type.DecimalType)2 Decimals (com.facebook.presto.common.type.Decimals)2 Decimals.isLongDecimal (com.facebook.presto.common.type.Decimals.isLongDecimal)2 Decimals.isShortDecimal (com.facebook.presto.common.type.Decimals.isShortDecimal)2 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)2 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)2 REAL (com.facebook.presto.common.type.RealType.REAL)2 SMALLINT (com.facebook.presto.common.type.SmallintType.SMALLINT)2 TINYINT (com.facebook.presto.common.type.TinyintType.TINYINT)2 Type (com.facebook.presto.common.type.Type)2 Varchars.isVarcharType (com.facebook.presto.common.type.Varchars.isVarcharType)2