Search in sources :

Example 6 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method updatePartitionsStatistics.

@Override
public synchronized void updatePartitionsStatistics(HiveIdentity identity, String databaseName, String tableName, Map<String, Function<PartitionStatistics, PartitionStatistics>> partNamesUpdateFunctionMap) {
    ImmutableList.Builder<Partition> modifiedPartitionBuilder = ImmutableList.builder();
    ImmutableMap.Builder<String, PartitionInfo> partitionInfoMapBuilder = ImmutableMap.builder();
    Optional<Table> table = getTable(identity, databaseName, tableName);
    List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, partNamesUpdateFunctionMap.keySet().stream().collect(Collectors.toList()));
    Map<String, PartitionStatistics> partitionsStatistics = getPartitionStatistics(identity, table.get(), partitions);
    if (partitions.size() != partitionsStatistics.size() || partitions.size() != partNamesUpdateFunctionMap.size()) {
        throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions");
    }
    List<String> partColumns = table.get().getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
    for (int index = 0; index < partitions.size(); index++) {
        String partitionName = makePartName(partColumns, partitions.get(index).getValues());
        PartitionStatistics currentStatistics = requireNonNull(partitionsStatistics.get(partitionName), "getPartitionStatistics() returned null");
        PartitionStatistics updatedStatistics = partNamesUpdateFunctionMap.get(partitionName).apply(currentStatistics);
        Partition originalPartition = partitions.get(index);
        Partition modifiedPartition = originalPartition.deepCopy();
        HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
        modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
        originalPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(originalPartition.getParameters(), basicStatistics));
        modifiedPartitionBuilder.add(modifiedPartition);
        partitionInfoMapBuilder.put(partitionName, new PartitionInfo(basicStatistics, currentStatistics, originalPartition, updatedStatistics));
    }
    alterPartitionsWithoutStatistics(databaseName, tableName, modifiedPartitionBuilder.build());
    ImmutableMap<String, PartitionInfo> partitionInfoMap = partitionInfoMapBuilder.build();
    partitionInfoMap.forEach((partName, partInfo) -> updatePartitionColumnStatistics(identity, partInfo.modifiedPartition, databaseName, tableName, partName, partInfo.basicStatistics, partInfo.currentStatistics, partInfo.updatedStatistics));
}
Also used : HivePartition(io.prestosql.plugin.hive.HivePartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics)

Example 7 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project hetu-core by openlookeng.

the class TestStatistics method testReduce.

@Test
public void testReduce() {
    assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), ADD)).isEqualTo(new HiveBasicStatistics(12, 11, 10, 9));
    assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), SUBTRACT)).isEqualTo(new HiveBasicStatistics(10, 7, 4, 1));
}
Also used : HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) Test(org.testng.annotations.Test)

Example 8 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project hetu-core by openlookeng.

the class FileHiveMetastore method getTableStatistics.

private PartitionStatistics getTableStatistics(HiveIdentity identity, String databaseName, String tableName) {
    Path tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName);
    TableMetadata tableMetadata = readSchemaFile("table", tableMetadataDirectory, tableCodec).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    HiveBasicStatistics basicStatistics = getHiveBasicStatistics(tableMetadata.getParameters());
    Map<String, HiveColumnStatistics> columnStatistics = tableMetadata.getColumnStatistics();
    return new PartitionStatistics(basicStatistics, columnStatistics);
}
Also used : Path(org.apache.hadoop.fs.Path) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 9 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project hetu-core by openlookeng.

the class TestThriftMetastoreUtil method testBasicStatisticsRoundTrip.

@Test
public void testBasicStatisticsRoundTrip() {
    testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()));
    testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty()));
    testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)));
}
Also used : HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) Test(org.testng.annotations.Test)

Example 10 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project hetu-core by openlookeng.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    String partitionName = "p1=string1/p2=1234";
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName)), true, table), expected);
}
Also used : DoubleRange(io.prestosql.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Aggregations

HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)33 PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)23 Test (org.testng.annotations.Test)12 PrestoException (io.prestosql.spi.PrestoException)11 HivePartition (io.prestosql.plugin.hive.HivePartition)10 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)9 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)8 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)8 OptionalLong (java.util.OptionalLong)8 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)6 PartitionNotFoundException (io.prestosql.plugin.hive.PartitionNotFoundException)6 ThriftMetastoreUtil.getHiveBasicStatistics (io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics)6 HiveConfig (io.prestosql.plugin.hive.HiveConfig)5 HiveSessionProperties (io.prestosql.plugin.hive.HiveSessionProperties)5 OrcFileWriterConfig (io.prestosql.plugin.hive.OrcFileWriterConfig)5