use of com.facebook.presto.spi.statistics.ColumnStatisticType in project presto by prestodb.
the class TestStatisticAggregationsDescriptor method testColumnStatisticMetadataKeySerializationRoundTrip.
@Test
public void testColumnStatisticMetadataKeySerializationRoundTrip() {
for (String column : COLUMNS) {
for (ColumnStatisticType type : ColumnStatisticType.values()) {
ColumnStatisticMetadata expected = new ColumnStatisticMetadata(column, type);
assertEquals(deserialize(serialize(expected)), expected);
}
}
}
use of com.facebook.presto.spi.statistics.ColumnStatisticType in project presto by prestodb.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
use of com.facebook.presto.spi.statistics.ColumnStatisticType in project presto by prestodb.
the class HiveMetadata method createTemporaryTable.
@Override
public ConnectorTableHandle createTemporaryTable(ConnectorSession session, List<ColumnMetadata> columns, Optional<ConnectorPartitioningMetadata> partitioningMetadata) {
String schemaName = getTemporaryTableSchema(session);
HiveStorageFormat storageFormat = getTemporaryTableStorageFormat(session);
Optional<HiveBucketProperty> bucketProperty = partitioningMetadata.map(partitioning -> {
Set<String> allColumns = columns.stream().map(ColumnMetadata::getName).collect(toImmutableSet());
if (!allColumns.containsAll(partitioning.getPartitionColumns())) {
throw new PrestoException(INVALID_TABLE_PROPERTY, format("Bucketing columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(partitioning.getPartitionColumns()), allColumns)));
}
HivePartitioningHandle partitioningHandle = (HivePartitioningHandle) partitioning.getPartitioningHandle();
List<String> partitionColumns = partitioning.getPartitionColumns();
BucketFunctionType bucketFunctionType = partitioningHandle.getBucketFunctionType();
switch(bucketFunctionType) {
case HIVE_COMPATIBLE:
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty());
case PRESTO_NATIVE:
Map<String, Type> columnNameToTypeMap = columns.stream().collect(toMap(ColumnMetadata::getName, ColumnMetadata::getType));
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), PRESTO_NATIVE, Optional.of(partitionColumns.stream().map(columnNameToTypeMap::get).collect(toImmutableList())));
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketFunctionType);
}
});
if (isUsePageFileForHiveUnsupportedType(session)) {
if (!columns.stream().map(ColumnMetadata::getType).allMatch(HiveTypeTranslator::isSupportedHiveType)) {
storageFormat = PAGEFILE;
}
}
// PAGEFILE format doesn't require translation to hive type,
// choose HIVE_BINARY as a default hive type to make it compatible with Hive connector
Optional<HiveType> defaultHiveType = storageFormat == PAGEFILE ? Optional.of(HIVE_BINARY) : Optional.empty();
List<HiveColumnHandle> columnHandles = getColumnHandles(// type to the boolean type that is binary compatible
translateHiveUnsupportedTypesForTemporaryTable(columns, typeManager), ImmutableSet.of(), typeTranslator, defaultHiveType);
validateColumns(storageFormat, columnHandles);
HiveStorageFormat finalStorageFormat = storageFormat;
String tableName = PRESTO_TEMPORARY_TABLE_NAME_PREFIX + finalStorageFormat.name() + "_" + session.getQueryId().replaceAll("-", "_") + "_" + randomUUID().toString().replaceAll("-", "_");
Table table = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(session.getUser()).setTableType(TEMPORARY_TABLE).setDataColumns(columnHandles.stream().map(handle -> new Column(handle.getName(), handle.getHiveType(), handle.getComment(), Optional.empty())).collect(toImmutableList())).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(finalStorageFormat)).setBucketProperty(bucketProperty).setLocation("")).build();
List<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(getSupportedColumnStatisticsForTemporaryTable(typeManager.getType(column.getTypeSignature())))));
metastore.createTable(session, table, buildInitialPrivilegeSet(table.getOwner()), Optional.empty(), false, createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
return new HiveTableHandle(schemaName, tableName);
}
use of com.facebook.presto.spi.statistics.ColumnStatisticType in project presto by prestodb.
the class HiveMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
HiveTableHandle handle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = handle.getSchemaTableName();
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
List<Column> partitionColumns = table.getPartitionColumns();
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
if (partitionColumns.isEmpty()) {
// commit analyze to unpartitioned table
metastore.setTableStatistics(metastoreContext, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
} else {
List<List<String>> partitionValuesList;
if (handle.getAnalyzePartitionValues().isPresent()) {
partitionValuesList = handle.getAnalyzePartitionValues().get();
} else {
partitionValuesList = metastore.getPartitionNames(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(MetastoreUtil::toPartitionValues).collect(toImmutableList());
}
ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(metastoreContext, typeManager.getType(column.getTypeSignature())))));
Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
int usedComputedStatistics = 0;
for (List<String> partitionValues : partitionValuesList) {
ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
if (collectedStatistics == null) {
partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
} else {
usedComputedStatistics++;
partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics));
}
}
verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
metastore.setPartitionStatistics(metastoreContext, table, partitionStatistics.build());
}
}
use of com.facebook.presto.spi.statistics.ColumnStatisticType in project presto by prestodb.
the class StatisticsAggregationPlanner method createStatisticsAggregation.
public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, VariableReferenceExpression> columnToVariableMap, boolean useOriginalExpression) {
StatisticAggregationsDescriptor.Builder<VariableReferenceExpression> descriptor = StatisticAggregationsDescriptor.builder();
List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
List<VariableReferenceExpression> groupingVariables = groupingColumns.stream().map(columnToVariableMap::get).collect(toImmutableList());
for (int i = 0; i < groupingVariables.size(); i++) {
descriptor.addGrouping(groupingColumns.get(i), groupingVariables.get(i));
}
ImmutableMap.Builder<VariableReferenceExpression, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
StandardFunctionResolution functionResolution = new FunctionResolution(metadata.getFunctionAndTypeManager());
for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
if (type != ROW_COUNT) {
throw new PrestoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
}
AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(new CallExpression("count", functionResolution.countFunction(), BIGINT, ImmutableList.of()), Optional.empty(), Optional.empty(), false, Optional.empty());
VariableReferenceExpression variable = variableAllocator.newVariable("rowCount", BIGINT);
aggregations.put(variable, aggregation);
descriptor.addTableStatistic(ROW_COUNT, variable);
}
for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
String columnName = columnStatisticMetadata.getColumnName();
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
VariableReferenceExpression inputVariable = columnToVariableMap.get(columnName);
verify(inputVariable != null, "inputVariable is null");
ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputVariable, useOriginalExpression);
VariableReferenceExpression variable = variableAllocator.newVariable(statisticType + ":" + columnName, aggregation.getOutputType());
aggregations.put(variable, aggregation.getAggregation());
descriptor.addColumnStatistic(columnStatisticMetadata, variable);
}
StatisticAggregations aggregation = new StatisticAggregations(aggregations.build(), groupingVariables);
return new TableStatisticAggregation(aggregation, descriptor.build());
}
Aggregations