Search in sources :

Example 1 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    String partitionName = "p1=string1/p2=1234";
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
Also used : TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) CacheConfig(com.facebook.presto.cache.CacheConfig) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) Test(org.testng.annotations.Test)

Example 2 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
Also used : TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) CacheConfig(com.facebook.presto.cache.CacheConfig) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) HivePartition(com.facebook.presto.hive.HivePartition) Test(org.testng.annotations.Test)

Example 3 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.

@Test
public void testGetTableStatisticsValidationFailure() {
    PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
    String partitionName = "p1=string1/p2=1234";
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, corruptedStatistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)))).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
    TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName))), TableStatistics.empty());
}
Also used : MetastoreHiveStatisticsProvider.validatePartitionStatistics(com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) PrestoException(com.facebook.presto.spi.PrestoException) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) CacheConfig(com.facebook.presto.cache.CacheConfig) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) Test(org.testng.annotations.Test)

Example 4 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class AlluxioHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Set<String> partitionNames) {
    Table table = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    Map<String, HiveBasicStatistics> partitionBasicStatistics = getPartitionsByNames(metastoreContext, databaseName, tableName, ImmutableList.copyOf(partitionNames)).entrySet().stream().filter(entry -> entry.getValue().isPresent()).collect(toImmutableMap(entry -> MetastoreUtil.makePartName(table.getPartitionColumns(), entry.getValue().get().getValues()), entry -> getHiveBasicStatistics(entry.getValue().get().getParameters())));
    Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
    List<String> dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, List<ColumnStatisticsInfo>> columnStatisticss;
    try {
        columnStatisticss = client.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), partitionBasicStatistics.keySet().stream().collect(toImmutableList()), dataColumns);
    } catch (AlluxioStatusException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
    Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = columnStatisticss.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(metastoreContext, entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty()))));
    ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
    for (String partitionName : partitionBasicStatistics.keySet()) {
        HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
        Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
        result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
    }
    return result.build();
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) Table(com.facebook.presto.hive.metastore.Table) NotFoundException(com.facebook.presto.spi.NotFoundException) Column(com.facebook.presto.hive.metastore.Column) Database(com.facebook.presto.hive.metastore.Database) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) HiveMetastore(com.facebook.presto.hive.metastore.thrift.HiveMetastore) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) Inject(com.google.inject.Inject) HiveType(com.facebook.presto.hive.HiveType) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) PrestoException(com.facebook.presto.spi.PrestoException) MetastoreUtil.convertPredicateToParts(com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts) Function(java.util.function.Function) Partition(com.facebook.presto.hive.metastore.Partition) Duration(io.airlift.units.Duration) OptionalLong(java.util.OptionalLong) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HIVE_METASTORE_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR) ImmutableList(com.google.common.collect.ImmutableList) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) TableMasterClient(alluxio.client.table.TableMasterClient) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Constraint(alluxio.grpc.table.Constraint) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) Type(com.facebook.presto.common.type.Type) PartitionNameWithVersion(com.facebook.presto.hive.metastore.PartitionNameWithVersion) ImmutableMap(com.google.common.collect.ImmutableMap) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Domain(com.facebook.presto.common.predicate.Domain) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) RoleGrant(com.facebook.presto.spi.security.RoleGrant) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) Optional(java.util.Optional) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) Collections(java.util.Collections) Table(com.facebook.presto.hive.metastore.Table) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) PrestoException(com.facebook.presto.spi.PrestoException) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap)

Example 5 with PartitionStatistics

use of com.facebook.presto.hive.metastore.PartitionStatistics in project presto by prestodb.

the class AbstractTestHiveClient method testUpdatePartitionStatistics.

protected void testUpdatePartitionStatistics(SchemaTableName tableName, PartitionStatistics initialStatistics, List<PartitionStatistics> firstPartitionStatistics, List<PartitionStatistics> secondPartitionStatistics) {
    verify(firstPartitionStatistics.size() == secondPartitionStatistics.size());
    String firstPartitionName = "ds=2016-01-01";
    String secondPartitionName = "ds=2016-01-02";
    ExtendedHiveMetastore metastoreClient = getMetastoreClient();
    assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))).isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics));
    AtomicReference<PartitionStatistics> expectedStatisticsPartition1 = new AtomicReference<>(initialStatistics);
    AtomicReference<PartitionStatistics> expectedStatisticsPartition2 = new AtomicReference<>(initialStatistics);
    for (int i = 0; i < firstPartitionStatistics.size(); i++) {
        PartitionStatistics statisticsPartition1 = firstPartitionStatistics.get(i);
        PartitionStatistics statisticsPartition2 = secondPartitionStatistics.get(i);
        metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, actualStatistics -> {
            assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition1.get());
            return statisticsPartition1;
        });
        metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, actualStatistics -> {
            assertThat(actualStatistics).isEqualTo(expectedStatisticsPartition2.get());
            return statisticsPartition2;
        });
        assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))).isEqualTo(ImmutableMap.of(firstPartitionName, statisticsPartition1, secondPartitionName, statisticsPartition2));
        expectedStatisticsPartition1.set(statisticsPartition1);
        expectedStatisticsPartition2.set(statisticsPartition2);
    }
    assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))).isEqualTo(ImmutableMap.of(firstPartitionName, expectedStatisticsPartition1.get(), secondPartitionName, expectedStatisticsPartition2.get()));
    metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> {
        assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition1.get());
        return initialStatistics;
    });
    metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> {
        assertThat(currentStatistics).isEqualTo(expectedStatisticsPartition2.get());
        return initialStatistics;
    });
    assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))).isEqualTo(ImmutableMap.of(firstPartitionName, initialStatistics, secondPartitionName, initialStatistics));
}
Also used : PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) AtomicReference(java.util.concurrent.atomic.AtomicReference) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) Constraint(com.facebook.presto.spi.Constraint)

Aggregations

PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)35 SchemaTableName (com.facebook.presto.spi.SchemaTableName)20 PrestoException (com.facebook.presto.spi.PrestoException)19 ImmutableMap (com.google.common.collect.ImmutableMap)15 HiveColumnStatistics (com.facebook.presto.hive.metastore.HiveColumnStatistics)14 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)14 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)13 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)13 Table (com.facebook.presto.hive.metastore.Table)13 Map (java.util.Map)12 Optional (java.util.Optional)12 OptionalLong (java.util.OptionalLong)11 Type (com.facebook.presto.common.type.Type)10 Column (com.facebook.presto.hive.metastore.Column)10 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)10 List (java.util.List)10 Objects.requireNonNull (java.util.Objects.requireNonNull)10 Set (java.util.Set)10 Domain (com.facebook.presto.common.predicate.Domain)8