use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.
the class HiveMaterializedViewUtils method validateMaterializedViewPartitionColumns.
/**
* Validate the partition columns of a materialized view to ensure 1) a materialized view is partitioned; 2) it has at least one partition
* directly mapped to all base tables and 3) Outer join conditions have common partitions that are partitions in the view as well
* <p>
* A column is directly mapped to a base table column if it is derived directly or transitively from the base table column,
* by only selecting a column or an aliased column without any function or operator applied.
* For example, with SELECT column_b AS column_a, column_a is directly mapped to column_b.
* With SELECT column_b + column_c AS column_a, column_a is not directly mapped to any column.
* <p>
* {@code viewToBaseColumnMap} only contains direct column mappings.
*/
public static void validateMaterializedViewPartitionColumns(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, Table viewTable, ConnectorMaterializedViewDefinition viewDefinition) {
SchemaTableName viewName = new SchemaTableName(viewTable.getDatabaseName(), viewTable.getTableName());
Map<String, Map<SchemaTableName, String>> viewToBaseDirectColumnMap = viewDefinition.getDirectColumnMappingsAsMap();
if (viewToBaseDirectColumnMap.isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, format("Materialized view %s must have at least one column directly defined by a base table column.", viewName));
}
List<Column> viewPartitions = viewTable.getPartitionColumns();
if (viewPartitions.isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, "Unpartitioned materialized view is not supported.");
}
List<Table> baseTables = viewDefinition.getBaseTables().stream().map(baseTableName -> metastore.getTable(metastoreContext, baseTableName.getSchemaName(), baseTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(baseTableName))).collect(toImmutableList());
Map<Table, List<Column>> baseTablePartitions = baseTables.stream().collect(toImmutableMap(table -> table, Table::getPartitionColumns));
for (Table baseTable : baseTablePartitions.keySet()) {
SchemaTableName schemaBaseTable = new SchemaTableName(baseTable.getDatabaseName(), baseTable.getTableName());
if (!isCommonPartitionFound(schemaBaseTable, baseTablePartitions.get(baseTable), viewPartitions, viewToBaseDirectColumnMap)) {
throw new PrestoException(NOT_SUPPORTED, format("Materialized view %s must have at least one partition column that exists in %s as well", viewName, baseTable.getTableName()));
}
if (viewDefinition.getBaseTablesOnOuterJoinSide().contains(schemaBaseTable) && viewToBaseTableOnOuterJoinSideIndirectMappedPartitions(viewDefinition, baseTable).get().isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, format("Outer join conditions in Materialized view %s must have at least one common partition equality constraint", viewName));
}
}
}
use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.
the class HiveMaterializedViewUtils method getMaterializedDataPredicates.
public static MaterializedDataPredicates getMaterializedDataPredicates(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, TypeManager typeManager, Table table, DateTimeZone timeZone) {
List<Column> partitionColumns = table.getPartitionColumns();
for (Column partitionColumn : partitionColumns) {
HiveType hiveType = partitionColumn.getType();
if (!hiveType.isSupportedType()) {
throw new PrestoException(NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
}
}
List<HiveColumnHandle> partitionKeyColumnHandles = getPartitionKeyColumnHandles(table);
Map<String, Type> partitionTypes = partitionKeyColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(column.getTypeSignature())));
List<String> partitionNames = metastore.getPartitionNames(metastoreContext, table.getDatabaseName(), table.getTableName()).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName())));
ImmutableList.Builder<TupleDomain<String>> partitionNamesAndValues = ImmutableList.builder();
for (String partitionName : partitionNames) {
ImmutableMap.Builder<String, NullableValue> partitionNameAndValuesMap = ImmutableMap.builder();
Map<String, String> partitions = toPartitionNamesAndValues(partitionName);
if (partitionColumns.size() != partitions.size()) {
throw new PrestoException(HIVE_INVALID_METADATA, String.format("Expected %d partition key values, but got %d", partitionColumns.size(), partitions.size()));
}
partitionTypes.forEach((name, type) -> {
String value = partitions.get(name);
if (value == null) {
throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, String.format("partition key value cannot be null for field: %s", name));
}
partitionNameAndValuesMap.put(name, parsePartitionValue(name, value, type, timeZone));
});
TupleDomain<String> tupleDomain = TupleDomain.fromFixedValues(partitionNameAndValuesMap.build());
partitionNamesAndValues.add(tupleDomain);
}
return new MaterializedDataPredicates(partitionNamesAndValues.build(), partitionColumns.stream().map(Column::getName).collect(toImmutableList()));
}
use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.
the class HiveSplitManager method getPartitionSplitInfo.
private Map<String, PartitionSplitInfo> getPartitionSplitInfo(ConnectorSession session, SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HivePartition> partitionBatch, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider());
Map<String, Optional<Partition>> partitions = metastore.getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
Map<String, PartitionStatistics> partitionStatistics = ImmutableMap.of();
if (domains.isPresent() && isPartitionStatisticsBasedOptimizationEnabled(session)) {
partitionStatistics = metastore.getPartitionStatistics(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionBatch.stream().map(HivePartition::getPartitionId).collect(toImmutableSet()));
}
Map<String, String> partitionNameToLocation = new HashMap<>();
ImmutableMap.Builder<String, PartitionSplitInfo> partitionSplitInfoBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : partitions.entrySet()) {
ImmutableSet.Builder<ColumnHandle> redundantColumnDomainsBuilder = ImmutableSet.builder();
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
}
boolean pruned = false;
if (partitionStatistics.containsKey(entry.getKey())) {
Map<String, HiveColumnStatistics> columnStatistics = partitionStatistics.get(entry.getKey()).getColumnStatistics();
for (Map.Entry<String, HiveColumnHandle> predicateColumnEntry : predicateColumns.entrySet()) {
if (columnStatistics.containsKey(predicateColumnEntry.getKey())) {
Optional<ValueSet> columnsStatisticsValueSet = getColumnStatisticsValueSet(columnStatistics.get(predicateColumnEntry.getKey()), predicateColumnEntry.getValue().getHiveType());
Subfield subfield = new Subfield(predicateColumnEntry.getKey());
if (columnsStatisticsValueSet.isPresent() && domains.get().containsKey(subfield)) {
ValueSet columnPredicateValueSet = domains.get().get(subfield).getValues();
if (!columnPredicateValueSet.overlaps(columnsStatisticsValueSet.get())) {
pruned = true;
break;
}
if (columnPredicateValueSet.contains(columnsStatisticsValueSet.get())) {
redundantColumnDomainsBuilder.add(predicateColumnEntry.getValue());
}
}
}
}
}
if (!pruned) {
partitionNameToLocation.put(entry.getKey(), entry.getValue().get().getStorage().getLocation());
}
partitionSplitInfoBuilder.put(entry.getKey(), new PartitionSplitInfo(entry.getValue().get(), pruned, redundantColumnDomainsBuilder.build()));
}
metastore.setPartitionLeases(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNameToLocation, getLeaseDuration(session));
return partitionSplitInfoBuilder.build();
}
use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingContext splitSchedulingContext) {
HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
SchemaTableName tableName = layout.getSchemaTableName();
// get table metadata
TransactionalMetadata metadata = hiveTransactionManager.get(transaction);
if (metadata == null) {
throw new PrestoException(HIVE_TRANSACTION_NOT_FOUND, format("Transaction not found: %s", transaction));
}
SemiTransactionalHiveMetastore metastore = metadata.getMetastore();
Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (!isOfflineDataDebugModeEnabled(session)) {
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
}
// get partitions
List<HivePartition> partitions = layout.getPartitions().orElseThrow(() -> new PrestoException(GENERIC_INTERNAL_ERROR, "Layout does not contain partitions"));
// short circuit if we don't have any partitions
HivePartition partition = Iterables.getFirst(partitions, null);
if (partition == null) {
return new FixedSplitSource(ImmutableList.of());
}
Optional<HiveBucketFilter> bucketFilter = layout.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = layout.getBucketHandle();
if ((splitSchedulingContext.getSplitSchedulingStrategy() == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
if (bucketHandle.isPresent()) {
if (bucketHandle.get().getReadBucketCount() > bucketHandle.get().getTableBucketCount()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "readBucketCount (%s) is greater than the tableBucketCount (%s) which generally points to an issue in plan generation");
}
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(metastore, table, tableName, partitions, bucketHandle, session, splitSchedulingContext.getWarningCollector(), layout.getRequestedColumns(), layout.getPredicateColumns(), layout.getDomainPredicate().getDomains());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, getPathDomain(layout.getDomainPredicate(), layout.getPredicateColumns()), createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, // Avoid over-committing split loader concurrency
min(splitLoaderConcurrency, partitions.size()), recursiveDfsWalkerEnabled, splitSchedulingContext.schedulerUsesHostAddresses(), layout.isPartialAggregationsPushedDown());
HiveSplitSource splitSource;
CacheQuotaRequirement cacheQuotaRequirement = cacheQuotaRequirementProvider.getCacheQuotaRequirement(table.getDatabaseName(), table.getTableName());
switch(splitSchedulingContext.getSplitSchedulingStrategy()) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplits, maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
case REWINDABLE_GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketedRewindable(session, table.getDatabaseName(), table.getTableName(), cacheQuotaRequirement, getHiveMaxInitialSplitSize(session), maxOutstandingSplitsSize, hiveSplitLoader, executor, new CounterStat());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingContext.getSplitSchedulingStrategy());
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.
the class SqlStandardAccessControl method checkCanSetRole.
@Override
public void checkCanSetRole(ConnectorTransactionHandle transaction, ConnectorIdentity identity, AccessControlContext context, String role, String catalogName) {
SemiTransactionalHiveMetastore metastore = getMetastore(transaction);
MetastoreContext metastoreContext = new MetastoreContext(identity, context.getQueryId().getId(), context.getClientInfo(), context.getSource(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
if (!isRoleApplicable(metastore, identity, new PrestoPrincipal(USER, identity.getUser()), metastoreContext, role)) {
denySetRole(role);
}
}
Aggregations