use of io.prestosql.spi.statistics.ColumnStatisticType in project hetu-core by openlookeng.
the class TestStatisticAggregationsDescriptor method testColumnStatisticMetadataKeySerializationRoundTrip.
@Test
public void testColumnStatisticMetadataKeySerializationRoundTrip() {
for (String column : COLUMNS) {
for (ColumnStatisticType type : ColumnStatisticType.values()) {
ColumnStatisticMetadata expected = new ColumnStatisticMetadata(column, type);
assertEquals(deserialize(serialize(expected)), expected);
}
}
}
use of io.prestosql.spi.statistics.ColumnStatisticType in project hetu-core by openlookeng.
the class StatisticsAggregationPlanner method createStatisticsAggregation.
public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, Symbol> columnToSymbolMap) {
StatisticAggregationsDescriptor.Builder<Symbol> descriptor = StatisticAggregationsDescriptor.builder();
List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
List<Symbol> groupingSymbols = groupingColumns.stream().map(columnToSymbolMap::get).collect(toImmutableList());
for (int i = 0; i < groupingSymbols.size(); i++) {
descriptor.addGrouping(groupingColumns.get(i), groupingSymbols.get(i));
}
ImmutableMap.Builder<Symbol, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
StandardFunctionResolution functionResolution = new FunctionResolution(metadata.getFunctionAndTypeManager());
for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
if (type != ROW_COUNT) {
throw new PrestoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
}
AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(new CallExpression("count", functionResolution.countFunction(), BIGINT, ImmutableList.of(), Optional.empty()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty());
Symbol symbol = planSymbolAllocator.newSymbol("rowCount", BIGINT);
aggregations.put(symbol, aggregation);
descriptor.addTableStatistic(ROW_COUNT, symbol);
}
for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
String columnName = columnStatisticMetadata.getColumnName();
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
Symbol inputSymbol = columnToSymbolMap.get(columnName);
verify(inputSymbol != null, "inputSymbol is null");
Type inputType = planSymbolAllocator.getTypes().get(inputSymbol);
verify(inputType != null, "inputType is null for symbol: %s", inputSymbol);
ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
Symbol symbol = planSymbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType());
aggregations.put(symbol, aggregation.getAggregation());
descriptor.addColumnStatistic(columnStatisticMetadata, symbol);
}
StatisticAggregations aggregation = new StatisticAggregations(aggregations.build(), groupingSymbols);
return new TableStatisticAggregation(aggregation, descriptor.build());
}
use of io.prestosql.spi.statistics.ColumnStatisticType in project hetu-core by openlookeng.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
use of io.prestosql.spi.statistics.ColumnStatisticType in project hetu-core by openlookeng.
the class HiveMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
HiveIdentity identity = new HiveIdentity(session);
HiveTableHandle handle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = handle.getSchemaTableName();
Table table = metastore.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
List<Column> partitionColumns = table.getPartitionColumns();
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> computedStatisticsMap = Statistics.createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
if (partitionColumns.isEmpty()) {
// commit analyze to unpartitioned table
metastore.setTableStatistics(identity, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
} else {
List<List<String>> partitionValuesList;
if (handle.getAnalyzePartitionValues().isPresent()) {
partitionValuesList = handle.getAnalyzePartitionValues().get();
} else {
partitionValuesList = metastore.getPartitionNames(identity, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(HiveUtil::toPartitionValues).collect(toImmutableList());
}
ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(typeManager.getType(column.getTypeSignature())))));
Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> Statistics.createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
int usedComputedStatistics = 0;
for (List<String> partitionValues : partitionValuesList) {
ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
if (collectedStatistics == null) {
partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
} else {
usedComputedStatistics++;
partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics));
}
}
verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
metastore.setPartitionStatistics(identity, table, partitionStatistics.build());
}
}
use of io.prestosql.spi.statistics.ColumnStatisticType in project boostkit-bigdata by kunpengcompute.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
Aggregations