use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
HiveTableHandle handle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = handle.getSchemaTableName();
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
List<Column> partitionColumns = table.getPartitionColumns();
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
if (partitionColumns.isEmpty()) {
// commit analyze to unpartitioned table
metastore.setTableStatistics(metastoreContext, table, createPartitionStatistics(session, columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
} else {
List<List<String>> partitionValuesList;
if (handle.getAnalyzePartitionValues().isPresent()) {
partitionValuesList = handle.getAnalyzePartitionValues().get();
} else {
partitionValuesList = metastore.getPartitionNames(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(MetastoreUtil::toPartitionValues).collect(toImmutableList());
}
ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(metastoreContext, typeManager.getType(column.getTypeSignature())))));
Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
int usedComputedStatistics = 0;
for (List<String> partitionValues : partitionValuesList) {
ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
if (collectedStatistics == null) {
partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
} else {
usedComputedStatistics++;
partitionStatistics.put(partitionValues, createPartitionStatistics(session, columnTypes, collectedStatistics));
}
}
verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
metastore.setPartitionStatistics(metastoreContext, table, partitionStatistics.build());
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMetadata method columnMetadataGetter.
@VisibleForTesting
static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(Table table, TypeManager typeManager, ColumnConverter columnConverter) {
ImmutableList.Builder<String> columnNames = ImmutableList.builder();
table.getPartitionColumns().stream().map(Column::getName).forEach(columnNames::add);
table.getDataColumns().stream().map(Column::getName).forEach(columnNames::add);
List<String> allColumnNames = columnNames.build();
if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName()));
}
List<Column> tableColumns = table.getDataColumns();
ImmutableMap.Builder<String, Optional<String>> builder = ImmutableMap.builder();
ImmutableMap.Builder<String, Optional<String>> typeMetadataBuilder = ImmutableMap.builder();
for (Column field : concat(tableColumns, table.getPartitionColumns())) {
if (field.getComment().isPresent() && !field.getComment().get().equals("from deserializer")) {
builder.put(field.getName(), field.getComment());
} else {
builder.put(field.getName(), Optional.empty());
}
typeMetadataBuilder.put(field.getName(), field.getTypeMetadata());
}
// add hidden columns
builder.put(PATH_COLUMN_NAME, Optional.empty());
if (table.getStorage().getBucketProperty().isPresent()) {
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}
builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty());
builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty());
Map<String, Optional<String>> columnComment = builder.build();
Map<String, Optional<String>> typeMetadata = typeMetadataBuilder.build();
return handle -> new ColumnMetadata(handle.getName(), typeManager.getType(columnConverter.getTypeSignature(handle.getHiveType(), typeMetadata.getOrDefault(handle.getName(), Optional.empty()))), columnComment.get(handle.getName()).orElse(null), columnExtraInfo(handle.isPartitionKey()), handle.isHidden());
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMetadata method getMaterializedViewStatus.
@Override
public MaterializedViewStatus getMaterializedViewStatus(ConnectorSession session, SchemaTableName materializedViewName) {
MetastoreContext metastoreContext = getMetastoreContext(session);
ConnectorMaterializedViewDefinition viewDefinition = getMaterializedView(session, materializedViewName).orElseThrow(() -> new MaterializedViewNotFoundException(materializedViewName));
List<Table> baseTables = viewDefinition.getBaseTables().stream().map(baseTableName -> metastore.getTable(metastoreContext, baseTableName.getSchemaName(), baseTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(baseTableName))).collect(toImmutableList());
baseTables.forEach(table -> checkState(table.getTableType().equals(MANAGED_TABLE), format("base table %s is not a managed table", table.getTableName())));
Table materializedViewTable = metastore.getTable(metastoreContext, materializedViewName.getSchemaName(), materializedViewName.getTableName()).orElseThrow(() -> new MaterializedViewNotFoundException(materializedViewName));
checkState(materializedViewTable.getTableType().equals(MATERIALIZED_VIEW), format("materialized view table %s is not a materialized view", materializedViewTable.getTableName()));
validateMaterializedViewPartitionColumns(metastore, metastoreContext, materializedViewTable, viewDefinition);
Map<String, Map<SchemaTableName, String>> directColumnMappings = viewDefinition.getDirectColumnMappingsAsMap();
Map<SchemaTableName, Map<String, String>> viewToBasePartitionMap = getViewToBasePartitionMap(materializedViewTable, baseTables, directColumnMappings);
MaterializedDataPredicates materializedDataPredicates = getMaterializedDataPredicates(metastore, metastoreContext, typeManager, materializedViewTable, timeZone);
if (materializedDataPredicates.getPredicateDisjuncts().isEmpty()) {
return new MaterializedViewStatus(NOT_MATERIALIZED);
}
// Partitions to keep track of for materialized view freshness are the partitions of every base table
// that are not available/updated to the materialized view yet.
Map<SchemaTableName, MaterializedDataPredicates> partitionsFromBaseTables = baseTables.stream().collect(toImmutableMap(baseTable -> new SchemaTableName(baseTable.getDatabaseName(), baseTable.getTableName()), baseTable -> {
MaterializedDataPredicates baseTableMaterializedPredicates = getMaterializedDataPredicates(metastore, metastoreContext, typeManager, baseTable, timeZone);
SchemaTableName schemaTableName = new SchemaTableName(baseTable.getDatabaseName(), baseTable.getTableName());
Map<String, String> viewToBaseIndirectMappedColumns = viewToBaseTableOnOuterJoinSideIndirectMappedPartitions(viewDefinition, baseTable).orElse(ImmutableMap.of());
return differenceDataPredicates(baseTableMaterializedPredicates, materializedDataPredicates, viewToBasePartitionMap.getOrDefault(schemaTableName, ImmutableMap.of()), viewToBaseIndirectMappedColumns);
}));
for (MaterializedDataPredicates dataPredicates : partitionsFromBaseTables.values()) {
if (!dataPredicates.getPredicateDisjuncts().isEmpty()) {
if (dataPredicates.getPredicateDisjuncts().stream().mapToInt(tupleDomain -> tupleDomain.getDomains().isPresent() ? tupleDomain.getDomains().get().size() : 0).sum() > HiveSessionProperties.getMaterializedViewMissingPartitionsThreshold(session)) {
return new MaterializedViewStatus(TOO_MANY_PARTITIONS_MISSING, partitionsFromBaseTables);
}
return new MaterializedViewStatus(PARTIALLY_MATERIALIZED, partitionsFromBaseTables);
}
}
return new MaterializedViewStatus(FULLY_MATERIALIZED);
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HivePageSourceProvider method getPageSourceFromCursorProvider.
private static Optional<ConnectorPageSource> getPageSourceFromCursorProvider(Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> hiveColumns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, SchemaTableName tableName, List<HiveColumnHandle> partitionKeyColumnHandles, List<Column> tableDataColumns, Map<String, String> tableParameters, int partitionDataColumnCount, TableToPartitionMapping tableToPartitionMapping, boolean s3SelectPushdownEnabled, RowExpression remainingPredicate, boolean isPushdownFilterEnabled, RowExpressionService rowExpressionService, Map<String, String> customSplitInfo, List<HiveColumnHandle> allColumns, List<ColumnMapping> columnMappings, Set<Integer> outputIndices, List<ColumnMapping> regularAndInterimColumnMappings, Optional<BucketAdaptation> bucketAdaptation) {
if (!hiveColumns.isEmpty() && hiveColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<Column> partitionDataColumns = reconstructPartitionSchema(tableDataColumns, partitionDataColumnCount, tableToPartitionMapping.getPartitionSchemaDifference(), tableToPartitionMapping.getTableToPartitionColumns());
Properties schema = getHiveSchema(storage, partitionDataColumns, tableDataColumns, tableParameters, tableName.getSchemaName(), tableName.getTableName(), partitionKeyColumnHandles.stream().map(column -> column.getName()).collect(toImmutableList()), partitionKeyColumnHandles.stream().map(column -> column.getHiveType()).collect(toImmutableList()));
Optional<RecordCursor> cursor = provider.createRecordCursor(configuration, session, path, start, length, fileSize, schema, toColumnHandles(regularAndInterimColumnMappings, doCoercion), effectivePredicate, hiveStorageTimeZone, typeManager, s3SelectPushdownEnabled, customSplitInfo);
if (cursor.isPresent()) {
RecordCursor delegate = cursor.get();
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, hiveStorageTimeZone, typeManager, delegate);
List<Type> columnTypes = allColumns.stream().map(input -> typeManager.getType(input.getTypeSignature())).collect(toList());
RecordPageSource recordPageSource = new RecordPageSource(columnTypes, hiveRecordCursor);
if (isPushdownFilterEnabled) {
return Optional.of(new FilteringPageSource(columnMappings, effectivePredicate, remainingPredicate, typeManager, rowExpressionService, session, outputIndices, recordPageSource));
}
return Optional.of(recordPageSource);
}
}
return Optional.empty();
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HivePageSourceProvider method shouldSkipPartition.
private static boolean shouldSkipPartition(TypeManager typeManager, HiveTableLayoutHandle hiveLayout, DateTimeZone hiveStorageTimeZone, HiveSplit hiveSplit, SplitContext splitContext) {
List<HiveColumnHandle> partitionColumns = hiveLayout.getPartitionColumns();
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
if (!splitContext.getDynamicFilterPredicate().isPresent() || hiveSplit.getPartitionKeys().isEmpty() || partitionColumns.isEmpty() || partitionColumns.size() != partitionKeys.size()) {
return false;
}
TupleDomain<ColumnHandle> dynamicFilter = splitContext.getDynamicFilterPredicate().get();
Map<ColumnHandle, Domain> domains = dynamicFilter.getDomains().get();
for (int i = 0; i < partitionKeys.size(); i++) {
Type type = partitionTypes.get(i);
HivePartitionKey hivePartitionKey = partitionKeys.get(i);
HiveColumnHandle hiveColumnHandle = partitionColumns.get(i);
Domain allowedDomain = domains.get(hiveColumnHandle);
NullableValue value = parsePartitionValue(hivePartitionKey, type, hiveStorageTimeZone);
if (allowedDomain != null && !allowedDomain.includesNullableValue(value.getValue())) {
return true;
}
}
return false;
}
Aggregations