use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method storePartitionColumnStatistics.
private void storePartitionColumnStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, PartitionWithStatistics partitionWithStatistics) {
PartitionStatistics statistics = partitionWithStatistics.getStatistics();
Map<String, HiveColumnStatistics> columnStatistics = statistics.getColumnStatistics();
if (columnStatistics.isEmpty()) {
return;
}
Map<String, HiveType> columnTypes = partitionWithStatistics.getPartition().getColumns().stream().collect(toImmutableMap(Column::getName, Column::getType));
setPartitionColumnStatistics(identity, databaseName, tableName, partitionName, columnTypes, columnStatistics, statistics.getBasicStatistics().getRowCount());
}
use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method getTableStatistics.
@Override
public PartitionStatistics getTableStatistics(HiveIdentity identity, Table table) {
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
HiveBasicStatistics basicStatistics = ThriftMetastoreUtil.getHiveBasicStatistics(table.getParameters());
Map<String, HiveColumnStatistics> columnStatistics = getTableColumnStatistics(identity, table.getDbName(), table.getTableName(), dataColumns, basicStatistics.getRowCount());
return new PartitionStatistics(basicStatistics, columnStatistics);
}
use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.
the class ThriftHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.build();
}
use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.
the class Statistics method createHiveColumnStatistics.
private static HiveColumnStatistics createHiveColumnStatistics(ConnectorSession session, Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
// We ask the engine to compute either both or neither
verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
if (computedStatistics.containsKey(MIN_VALUE)) {
setMinMax(session, columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
}
// MAX_VALUE_SIZE_IN_BYTES
if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
result.setMaxValueSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
}
// TOTAL_VALUES_SIZE_IN_BYTES
if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
result.setTotalSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
}
// NUMBER OF NULLS
if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
}
// NDV
if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
// number of distinct value is estimated using HLL, and can be higher than the number of non null values
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
if (numberOfDistinctValues > numberOfNonNullValues) {
result.setDistinctValuesCount(numberOfNonNullValues);
} else {
result.setDistinctValuesCount(numberOfDistinctValues);
}
}
// NUMBER OF FALSE, NUMBER OF TRUE
if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
}
return result.build();
}
Aggregations