use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class HiveMetadata method getTableLayout.
@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
HivePartitioningHandle partitioningHandle;
int bucketCount = hiveBucketHandle.getReadBucketCount();
OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
// Virtually bucketed table does not have table bucket property
if (hiveBucketHandle.isVirtuallyBucketed()) {
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
} else {
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
switch(bucketProperty.getBucketFunctionType()) {
case HIVE_COMPATIBLE:
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
break;
case PRESTO_NATIVE:
partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
break;
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
}
}
tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
TupleDomain<ColumnHandle> predicate;
if (hiveLayoutHandle.isPushdownFilterEnabled()) {
predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
} else {
predicate = createPredicate(partitionColumns, partitions);
}
// Expose ordering property of the table.
ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
// streamPartitioningColumns is how we partition the data across splits.
// localProperty is how we partition the data within a split.
// 1. add partition columns to streamPartitionColumns
partitionColumns.forEach(streamPartitionColumnsBuilder::add);
// 2. add sorted columns to streamPartitionColumns and localProperties
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
bucketProperty.getSortedBy().forEach(sortingColumn -> {
ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
streamPartitionColumnsBuilder.add(columnHandle);
});
// We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
// 1. When the bucket columns are the same as the prefix of the sort columns
// 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
}
}
return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class HivePartitionManager method getPartitionsIterator.
public Iterable<HivePartition> getPartitionsIterator(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, ConnectorSession session) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicateColumnHandles = constraint.getSummary();
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
Map<Column, Domain> effectivePredicate = createPartitionPredicates(metastore, session, effectivePredicateColumnHandles, partitionColumns, assumeCanonicalPartitionKeys);
if (partitionColumns.isEmpty()) {
return ImmutableList.of(new HivePartition(tableName));
} else {
return () -> {
List<String> filteredPartitionNames = getFilteredPartitionNames(session, metastore, tableName, effectivePredicate);
return filteredPartitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, constraint)).filter(Optional::isPresent).map(Optional::get).iterator();
};
}
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class HiveMaterializedViewUtils method getMaterializedDataPredicates.
public static MaterializedDataPredicates getMaterializedDataPredicates(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, TypeManager typeManager, Table table, DateTimeZone timeZone) {
List<Column> partitionColumns = table.getPartitionColumns();
for (Column partitionColumn : partitionColumns) {
HiveType hiveType = partitionColumn.getType();
if (!hiveType.isSupportedType()) {
throw new PrestoException(NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
}
}
List<HiveColumnHandle> partitionKeyColumnHandles = getPartitionKeyColumnHandles(table);
Map<String, Type> partitionTypes = partitionKeyColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(column.getTypeSignature())));
List<String> partitionNames = metastore.getPartitionNames(metastoreContext, table.getDatabaseName(), table.getTableName()).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName())));
ImmutableList.Builder<TupleDomain<String>> partitionNamesAndValues = ImmutableList.builder();
for (String partitionName : partitionNames) {
ImmutableMap.Builder<String, NullableValue> partitionNameAndValuesMap = ImmutableMap.builder();
Map<String, String> partitions = toPartitionNamesAndValues(partitionName);
if (partitionColumns.size() != partitions.size()) {
throw new PrestoException(HIVE_INVALID_METADATA, String.format("Expected %d partition key values, but got %d", partitionColumns.size(), partitions.size()));
}
partitionTypes.forEach((name, type) -> {
String value = partitions.get(name);
if (value == null) {
throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, String.format("partition key value cannot be null for field: %s", name));
}
partitionNameAndValuesMap.put(name, parsePartitionValue(name, value, type, timeZone));
});
TupleDomain<String> tupleDomain = TupleDomain.fromFixedValues(partitionNameAndValuesMap.build());
partitionNamesAndValues.add(tupleDomain);
}
return new MaterializedDataPredicates(partitionNamesAndValues.build(), partitionColumns.stream().map(Column::getName).collect(toImmutableList()));
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class RcFilePageSourceFactory method createPageSource.
@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
} else {
return Optional.empty();
}
if (fileSize == 0) {
throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
}
FSDataInputStream inputStream;
try {
inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
try {
ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
for (HiveColumnHandle column : columns) {
readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
}
RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
} catch (Throwable e) {
try {
inputStream.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof RcFileCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, message, e);
}
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class ParquetPageSourceFactory method getParquetTupleDomain.
public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<HiveColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
for (Entry<HiveColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
HiveColumnHandle columnHandle = entry.getKey();
// skip looking up predicates for complex types as Parquet only stores stats for primitives
if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) {
continue;
}
RichColumnDescriptor descriptor;
if (isPushedDownSubfield(columnHandle)) {
Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
descriptor = descriptorsByPath.get(subfieldPath);
} else {
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
}
}
return TupleDomain.withColumnDomains(predicate.build());
}
Aggregations