use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.
the class SemiTransactionalHiveMetastore method updatePartitionStatistics.
// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, we always overwrite every statistics.
// TODO: Collect file count, on-disk size and in-memory size during ANALYZE
private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics());
}
use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.
the class ThriftMetastoreUtil method getHiveBasicStatistics.
public static HiveBasicStatistics getHiveBasicStatistics(Map<String, String> parameters) {
OptionalLong numFiles = parse(parameters.get(NUM_FILES));
OptionalLong numRows = parse(parameters.get(NUM_ROWS));
OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE));
OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE));
return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}
use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.
the class TestStatistics method testReduce.
@Test
public void testReduce() {
assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), ADD)).isEqualTo(createEmptyStatistics());
assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), ADD)).isEqualTo(createEmptyStatistics());
assertThat(reduce(createEmptyStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
assertThat(reduce(createZeroStatistics(), createEmptyStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
assertThat(reduce(createEmptyStatistics(), createZeroStatistics(), SUBTRACT)).isEqualTo(createEmptyStatistics());
assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), ADD)).isEqualTo(new HiveBasicStatistics(12, 11, 10, 9));
assertThat(reduce(new HiveBasicStatistics(11, 9, 7, 5), new HiveBasicStatistics(1, 2, 3, 4), SUBTRACT)).isEqualTo(new HiveBasicStatistics(10, 7, 4, 1));
}
use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, corruptedStatistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table)).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table), TableStatistics.empty());
}
use of io.prestosql.plugin.hive.HiveBasicStatistics in project boostkit-bigdata by kunpengcompute.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
Aggregations