use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.
the class PushJoinIntoTableScan method apply.
@Override
public Result apply(JoinNode joinNode, Captures captures, Context context) {
if (joinNode.isCrossJoin()) {
return Result.empty();
}
TableScanNode left = captures.get(LEFT_TABLE_SCAN);
TableScanNode right = captures.get(RIGHT_TABLE_SCAN);
verify(!left.isUpdateTarget() && !right.isUpdateTarget(), "Unexpected Join over for-update table scan");
Expression effectiveFilter = getEffectiveFilter(joinNode);
FilterSplitResult filterSplitResult = splitFilter(effectiveFilter, left.getOutputSymbols(), right.getOutputSymbols(), context);
if (!filterSplitResult.getRemainingFilter().equals(BooleanLiteral.TRUE_LITERAL)) {
// TODO add extra filter node above join
return Result.empty();
}
if (left.getEnforcedConstraint().isNone() || right.getEnforcedConstraint().isNone()) {
// enforced constraint harder below.
return Result.empty();
}
Map<String, ColumnHandle> leftAssignments = left.getAssignments().entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getName(), Map.Entry::getValue));
Map<String, ColumnHandle> rightAssignments = right.getAssignments().entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getName(), Map.Entry::getValue));
/*
* We are (lazily) computing estimated statistics for join node and left and right table
* and passing those to connector via applyJoin.
*
* There are a couple reasons for this approach:
* - the engine knows how to estimate join and connector may not
* - the engine may have cached stats for the table scans (within context.getStatsProvider()), so can be able to provide information more inexpensively
* - in the future, the engine may be able to provide stats for table scan even in case when connector no longer can (see https://github.com/trinodb/trino/issues/6998)
* - the pushdown feasibility assessment logic may be different (or configured differently) for different connectors/catalogs.
*/
JoinStatistics joinStatistics = getJoinStatistics(joinNode, left, right, context);
Optional<JoinApplicationResult<TableHandle>> joinApplicationResult = metadata.applyJoin(context.getSession(), getJoinType(joinNode), left.getTable(), right.getTable(), filterSplitResult.getPushableConditions(), // TODO we could pass only subset of assignments here, those which are needed to resolve filterSplitResult.getPushableConditions
leftAssignments, rightAssignments, joinStatistics);
if (joinApplicationResult.isEmpty()) {
return Result.empty();
}
TableHandle handle = joinApplicationResult.get().getTableHandle();
Map<ColumnHandle, ColumnHandle> leftColumnHandlesMapping = joinApplicationResult.get().getLeftColumnHandles();
Map<ColumnHandle, ColumnHandle> rightColumnHandlesMapping = joinApplicationResult.get().getRightColumnHandles();
ImmutableMap.Builder<Symbol, ColumnHandle> assignmentsBuilder = ImmutableMap.builder();
assignmentsBuilder.putAll(left.getAssignments().entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> leftColumnHandlesMapping.get(entry.getValue()))));
assignmentsBuilder.putAll(right.getAssignments().entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> rightColumnHandlesMapping.get(entry.getValue()))));
Map<Symbol, ColumnHandle> assignments = assignmentsBuilder.buildOrThrow();
// convert enforced constraint
JoinNode.Type joinType = joinNode.getType();
TupleDomain<ColumnHandle> leftConstraint = deriveConstraint(left.getEnforcedConstraint(), leftColumnHandlesMapping, joinType == RIGHT || joinType == FULL);
TupleDomain<ColumnHandle> rightConstraint = deriveConstraint(right.getEnforcedConstraint(), rightColumnHandlesMapping, joinType == LEFT || joinType == FULL);
TupleDomain<ColumnHandle> newEnforcedConstraint = TupleDomain.withColumnDomains(ImmutableMap.<ColumnHandle, Domain>builder().putAll(leftConstraint.getDomains().orElseThrow()).putAll(rightConstraint.getDomains().orElseThrow()).buildOrThrow());
return Result.ofPlanNode(new ProjectNode(context.getIdAllocator().getNextId(), new TableScanNode(joinNode.getId(), handle, ImmutableList.copyOf(assignments.keySet()), assignments, newEnforcedConstraint, deriveTableStatisticsForPushdown(context.getStatsProvider(), context.getSession(), joinApplicationResult.get().isPrecalculateStatistics(), joinNode), false, Optional.empty()), Assignments.identity(joinNode.getOutputSymbols())));
}
use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.
the class HiveMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
HiveTableHandle hiveTable = (HiveTableHandle) table;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns());
TupleDomain<ColumnHandle> predicate = TupleDomain.all();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
// So computation of predicate and discretePredicates are not valid.
if (hiveTable.getPartitionNames().isEmpty()) {
Optional<List<HivePartition>> partitions = hiveTable.getPartitions().or(() -> {
// We load the partitions to compute the predicates enforced by the table.
// Note that the computation is not persisted in the table handle, so can be redone many times
// TODO: https://github.com/trinodb/trino/issues/10980.
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, table, new Constraint(hiveTable.getEnforcedConstraint()));
if (partitionManager.canPartitionsBeLoaded(partitionResult)) {
return Optional.of(partitionManager.getPartitionsAsList(partitionResult));
}
return Optional.empty();
});
if (partitions.isPresent()) {
List<HivePartition> hivePartitions = partitions.orElseThrow();
// Since the partitions are fully loaded now, we need to compute
predicate = createPredicate(partitionColumns, hivePartitions);
// this check allows us to ensure that table is partitioned
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(hivePartitions, hivePartition -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
}
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
List<LocalProperty<ColumnHandle>> sortingProperties = ImmutableList.of();
if (hiveTable.getBucketHandle().isPresent()) {
if (isPropagateTableScanSortingProperties(session) && !hiveTable.getBucketHandle().get().getSortedBy().isEmpty()) {
// Populating SortingProperty guarantees to the engine that it is reading pre-sorted input.
// We detect compatibility between table and partition level sorted_by properties
// and fail the query if there is a mismatch in HiveSplitManager#getPartitionMetadata.
// This can lead to incorrect results if a sorted_by property is defined over unsorted files.
Map<String, ColumnHandle> columnHandles = getColumnHandles(session, table);
sortingProperties = hiveTable.getBucketHandle().get().getSortedBy().stream().map(sortingColumn -> new SortingProperty<>(columnHandles.get(sortingColumn.getColumnName()), sortingColumn.getOrder().getSortOrder())).collect(toImmutableList());
}
if (isBucketExecutionEnabled(session)) {
tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning(new HivePartitioningHandle(bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.empty(), false), bucketing.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
}
return new ConnectorTableProperties(predicate, tablePartitioning, Optional.empty(), discretePredicates, sortingProperties);
}
use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.
the class HiveMetadata method beginInsert.
@Override
public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns, RetryMode retryMode) {
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table %s with column type %s not supported", tableName, column.getType()));
}
}
boolean isTransactional = isTransactionalTable(table.getParameters());
if (isTransactional && retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported with query retries enabled");
}
if (isTransactional && !autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported in explicit transactions (use autocommit mode)");
}
if (isSparkBucketedTable(table)) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Spark bucketed tables is not supported");
}
List<HiveColumnHandle> handles = hiveColumnHandles(table, typeManager, getTimestampPrecision(session)).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
Optional.ofNullable(table.getParameters().get(SKIP_HEADER_COUNT_KEY)).map(Integer::parseInt).ifPresent(headerSkipCount -> {
if (headerSkipCount > 1) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with value of %s property greater than 1 is not supported", SKIP_HEADER_COUNT_KEY));
}
});
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY));
}
LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table);
AcidTransaction transaction = isTransactional ? metastore.beginInsert(session, table) : NO_ACID_TRANSACTION;
HiveInsertTableHandle result = new HiveInsertTableHandle(tableName.getSchemaName(), tableName.getTableName(), handles, metastore.generatePageSinkMetadata(tableName), locationHandle, table.getStorage().getBucketProperty(), tableStorageFormat, isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session), transaction, retryMode != NO_RETRIES);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
if (getInsertExistingPartitionsBehavior(session) == InsertExistingPartitionsBehavior.OVERWRITE && writeInfo.getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) {
if (isTransactional) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in transactional tables doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
// Partition overwrite operation is nonatomic thus can't and shouldn't be used in non autocommit context.
if (!autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in non auto commit context doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
}
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName);
return result;
}
use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.
the class HivePartitionManager method getPartitions.
public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint constraint) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary().intersect(hiveTableHandle.getEnforcedConstraint());
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
if (effectivePredicate.isNone()) {
return new HivePartitionResult(partitionColumns, Optional.empty(), ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle, Optional.empty());
}
Optional<HiveBucketFilter> bucketFilter = getHiveBucketFilter(hiveTableHandle, effectivePredicate);
TupleDomain<HiveColumnHandle> compactEffectivePredicate = effectivePredicate.transformKeys(HiveColumnHandle.class::cast).simplify(domainCompactionThreshold);
if (partitionColumns.isEmpty()) {
return new HivePartitionResult(partitionColumns, Optional.empty(), ImmutableList.of(new HivePartition(tableName)), effectivePredicate, compactEffectivePredicate, hiveBucketHandle, bucketFilter);
}
List<Type> partitionTypes = partitionColumns.stream().map(HiveColumnHandle::getType).collect(toList());
Optional<List<String>> partitionNames = Optional.empty();
Iterable<HivePartition> partitionsIterable;
Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
if (hiveTableHandle.getPartitions().isPresent()) {
partitionsIterable = hiveTableHandle.getPartitions().get().stream().filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition)).collect(toImmutableList());
} else {
List<String> partitionNamesList = hiveTableHandle.getPartitionNames().orElseGet(() -> getFilteredPartitionNames(metastore, tableName, partitionColumns, compactEffectivePredicate));
partitionsIterable = () -> partitionNamesList.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate)).filter(Optional::isPresent).map(Optional::get).iterator();
partitionNames = Optional.of(partitionNamesList);
}
return new HivePartitionResult(partitionColumns, partitionNames, partitionsIterable, effectivePredicate, compactEffectivePredicate, hiveBucketHandle, bucketFilter);
}
use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.
the class HivePartitionManager method parsePartition.
public static HivePartition parsePartition(SchemaTableName tableName, String partitionName, List<HiveColumnHandle> partitionColumns, List<Type> partitionColumnTypes) {
List<String> partitionValues = extractPartitionValues(partitionName);
ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder();
for (int i = 0; i < partitionColumns.size(); i++) {
HiveColumnHandle column = partitionColumns.get(i);
NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), partitionColumnTypes.get(i));
builder.put(column, parsedValue);
}
Map<ColumnHandle, NullableValue> values = builder.buildOrThrow();
return new HivePartition(tableName, partitionName, values);
}
Aggregations