Search in sources :

Example 16 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.

the class SemiTransactionalHiveMetastore method updatePartitionStatistics.

// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, we always overwrite every statistics.
// TODO: Collect file count, on-disk size and in-memory size during ANALYZE
private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
    HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
    HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
    HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
    return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics());
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics)

Example 17 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.

the class ThriftMetastoreUtil method getHiveBasicStatistics.

public static HiveBasicStatistics getHiveBasicStatistics(Map<String, String> parameters) {
    OptionalLong numFiles = parse(parameters.get(NUM_FILES));
    OptionalLong numRows = parse(parameters.get(NUM_ROWS));
    OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE));
    OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE));
    return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}
Also used : OptionalLong(java.util.OptionalLong) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics)

Example 18 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.

the class TestStatistics method testReduce.

@Test
public void testReduce() {
    assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), ADD)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
    assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), ADD)).isEqualTo(new HiveBasicStatistics(12, 11, 10, 9));
    assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), SUBTRACT)).isEqualTo(new HiveBasicStatistics(10, 7, 4, 1));
}
Also used : HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) Test(org.testng.annotations.Test)

Example 19 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.

@Test
public void testGetTableStatisticsValidationFailure() {
    PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
    String partitionName = "p1=string1/p2=1234";
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, corruptedStatistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table)).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
    TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table), TableStatistics.empty());
}
Also used : MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Example 20 with HiveBasicStatistics

use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
Also used : DoubleRange(io.prestosql.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) HiveConfig(io.prestosql.plugin.hive.HiveConfig) HivePartition(io.prestosql.plugin.hive.HivePartition) Test(org.testng.annotations.Test)

Aggregations

HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)33 PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)23 Test (org.testng.annotations.Test)12 PrestoException (io.prestosql.spi.PrestoException)11 HivePartition (io.prestosql.plugin.hive.HivePartition)10 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)9 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)8 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)8 OptionalLong (java.util.OptionalLong)8 Partition (org.apache.hadoop.hive.metastore.api.Partition)8 Table (org.apache.hadoop.hive.metastore.api.Table)8 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)6 PartitionNotFoundException (io.prestosql.plugin.hive.PartitionNotFoundException)6 ThriftMetastoreUtil.getHiveBasicStatistics (io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics)6 HiveConfig (io.prestosql.plugin.hive.HiveConfig)5 HiveSessionProperties (io.prestosql.plugin.hive.HiveSessionProperties)5 OrcFileWriterConfig (io.prestosql.plugin.hive.OrcFileWriterConfig)5