use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.
the class HiveMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
HiveTableHandle hiveTable = (HiveTableHandle) table;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns());
TupleDomain<ColumnHandle> predicate = TupleDomain.all();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
// So computation of predicate and discretePredicates are not valid.
if (hiveTable.getPartitionNames().isEmpty()) {
Optional<List<HivePartition>> partitions = hiveTable.getPartitions().or(() -> {
// We load the partitions to compute the predicates enforced by the table.
// Note that the computation is not persisted in the table handle, so can be redone many times
// TODO: https://github.com/trinodb/trino/issues/10980.
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, table, new Constraint(hiveTable.getEnforcedConstraint()));
if (partitionManager.canPartitionsBeLoaded(partitionResult)) {
return Optional.of(partitionManager.getPartitionsAsList(partitionResult));
}
return Optional.empty();
});
if (partitions.isPresent()) {
List<HivePartition> hivePartitions = partitions.orElseThrow();
// Since the partitions are fully loaded now, we need to compute
predicate = createPredicate(partitionColumns, hivePartitions);
// this check allows us to ensure that table is partitioned
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(hivePartitions, hivePartition -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
}
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
List<LocalProperty<ColumnHandle>> sortingProperties = ImmutableList.of();
if (hiveTable.getBucketHandle().isPresent()) {
if (isPropagateTableScanSortingProperties(session) && !hiveTable.getBucketHandle().get().getSortedBy().isEmpty()) {
// Populating SortingProperty guarantees to the engine that it is reading pre-sorted input.
// We detect compatibility between table and partition level sorted_by properties
// and fail the query if there is a mismatch in HiveSplitManager#getPartitionMetadata.
// This can lead to incorrect results if a sorted_by property is defined over unsorted files.
Map<String, ColumnHandle> columnHandles = getColumnHandles(session, table);
sortingProperties = hiveTable.getBucketHandle().get().getSortedBy().stream().map(sortingColumn -> new SortingProperty<>(columnHandles.get(sortingColumn.getColumnName()), sortingColumn.getOrder().getSortOrder())).collect(toImmutableList());
}
if (isBucketExecutionEnabled(session)) {
tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning(new HivePartitioningHandle(bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.empty(), false), bucketing.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
}
return new ConnectorTableProperties(predicate, tablePartitioning, Optional.empty(), discretePredicates, sortingProperties);
}
use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.
the class HiveMetadata method getTableHandle.
@Override
public HiveTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) {
requireNonNull(tableName, "tableName is null");
if (isHiveSystemSchema(tableName.getSchemaName())) {
return null;
}
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElse(null);
if (table == null) {
return null;
}
if (isDeltaLakeTable(table)) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Cannot query Delta Lake table '%s'", tableName));
}
if (isIcebergTable(table)) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Cannot query Iceberg table '%s'", tableName));
}
// we must not allow system tables due to how permissions are checked in SystemTableAwareAccessControl
if (getSourceTableNameFromSystemTable(systemTableProviders, tableName).isPresent()) {
throw new TrinoException(HIVE_INVALID_METADATA, "Unexpected table present in Hive metastore: " + tableName);
}
verifyOnline(tableName, Optional.empty(), getProtectMode(table), table.getParameters());
return new HiveTableHandle(tableName.getSchemaName(), tableName.getTableName(), table.getParameters(), getPartitionKeyColumnHandles(table, typeManager), getRegularColumnHandles(table, typeManager, getTimestampPrecision(session)), getHiveBucketHandle(session, table, typeManager));
}
use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.
the class HiveMetadata method beginInsert.
@Override
public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns, RetryMode retryMode) {
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table %s with column type %s not supported", tableName, column.getType()));
}
}
boolean isTransactional = isTransactionalTable(table.getParameters());
if (isTransactional && retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported with query retries enabled");
}
if (isTransactional && !autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported in explicit transactions (use autocommit mode)");
}
if (isSparkBucketedTable(table)) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Spark bucketed tables is not supported");
}
List<HiveColumnHandle> handles = hiveColumnHandles(table, typeManager, getTimestampPrecision(session)).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
Optional.ofNullable(table.getParameters().get(SKIP_HEADER_COUNT_KEY)).map(Integer::parseInt).ifPresent(headerSkipCount -> {
if (headerSkipCount > 1) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with value of %s property greater than 1 is not supported", SKIP_HEADER_COUNT_KEY));
}
});
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY));
}
LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table);
AcidTransaction transaction = isTransactional ? metastore.beginInsert(session, table) : NO_ACID_TRANSACTION;
HiveInsertTableHandle result = new HiveInsertTableHandle(tableName.getSchemaName(), tableName.getTableName(), handles, metastore.generatePageSinkMetadata(tableName), locationHandle, table.getStorage().getBucketProperty(), tableStorageFormat, isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session), transaction, retryMode != NO_RETRIES);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
if (getInsertExistingPartitionsBehavior(session) == InsertExistingPartitionsBehavior.OVERWRITE && writeInfo.getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) {
if (isTransactional) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in transactional tables doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
// Partition overwrite operation is nonatomic thus can't and shouldn't be used in non autocommit context.
if (!autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in non auto commit context doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
}
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName);
return result;
}
use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.
the class HiveMetadata method doGetTableMetadata.
private ConnectorTableMetadata doGetTableMetadata(ConnectorSession session, SchemaTableName tableName) {
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (isIcebergTable(table) || isDeltaLakeTable(table)) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Not a Hive table '%s'", tableName));
}
if (!translateHiveViews && isHiveOrPrestoView(table)) {
throw new TableNotFoundException(tableName);
}
Function<HiveColumnHandle, ColumnMetadata> metadataGetter = columnMetadataGetter(table);
ImmutableList.Builder<ColumnMetadata> columns = ImmutableList.builder();
for (HiveColumnHandle columnHandle : hiveColumnHandles(table, typeManager, getTimestampPrecision(session))) {
columns.add(metadataGetter.apply(columnHandle));
}
// External location property
ImmutableMap.Builder<String, Object> properties = ImmutableMap.builder();
if (table.getTableType().equals(EXTERNAL_TABLE.name())) {
properties.put(EXTERNAL_LOCATION_PROPERTY, table.getStorage().getLocation());
}
// Storage format property
try {
HiveStorageFormat format = extractHiveStorageFormat(table);
properties.put(STORAGE_FORMAT_PROPERTY, format);
} catch (TrinoException ignored) {
// todo fail if format is not known
}
// Partitioning property
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
if (!partitionedBy.isEmpty()) {
properties.put(PARTITIONED_BY_PROPERTY, partitionedBy);
}
// Bucket properties
table.getStorage().getBucketProperty().ifPresent(property -> {
properties.put(BUCKETING_VERSION, property.getBucketingVersion().getVersion());
properties.put(BUCKET_COUNT_PROPERTY, property.getBucketCount());
properties.put(BUCKETED_BY_PROPERTY, property.getBucketedBy());
properties.put(SORTED_BY_PROPERTY, property.getSortedBy());
});
// Transactional properties
String transactionalProperty = table.getParameters().get(HiveMetadata.TRANSACTIONAL);
if (parseBoolean(transactionalProperty)) {
properties.put(HiveTableProperties.TRANSACTIONAL, true);
}
// ORC format specific properties
String orcBloomFilterColumns = table.getParameters().get(ORC_BLOOM_FILTER_COLUMNS_KEY);
if (orcBloomFilterColumns != null) {
properties.put(ORC_BLOOM_FILTER_COLUMNS, Splitter.on(',').trimResults().omitEmptyStrings().splitToList(orcBloomFilterColumns));
}
String orcBloomFilterFfp = table.getParameters().get(ORC_BLOOM_FILTER_FPP_KEY);
if (orcBloomFilterFfp != null) {
properties.put(ORC_BLOOM_FILTER_FPP, Double.parseDouble(orcBloomFilterFfp));
}
// Avro specific property
String avroSchemaUrl = table.getParameters().get(AVRO_SCHEMA_URL_KEY);
if (avroSchemaUrl != null) {
properties.put(AVRO_SCHEMA_URL, avroSchemaUrl);
}
// Textfile and CSV specific properties
getSerdeProperty(table, SKIP_HEADER_COUNT_KEY).ifPresent(skipHeaderCount -> properties.put(SKIP_HEADER_LINE_COUNT, Integer.valueOf(skipHeaderCount)));
getSerdeProperty(table, SKIP_FOOTER_COUNT_KEY).ifPresent(skipFooterCount -> properties.put(SKIP_FOOTER_LINE_COUNT, Integer.valueOf(skipFooterCount)));
// Multi-format property
getSerdeProperty(table, NULL_FORMAT_KEY).ifPresent(nullFormat -> properties.put(NULL_FORMAT_PROPERTY, nullFormat));
// Textfile specific properties
getSerdeProperty(table, TEXT_FIELD_SEPARATOR_KEY).ifPresent(fieldSeparator -> properties.put(TEXTFILE_FIELD_SEPARATOR, fieldSeparator));
getSerdeProperty(table, TEXT_FIELD_SEPARATOR_ESCAPE_KEY).ifPresent(fieldEscape -> properties.put(TEXTFILE_FIELD_SEPARATOR_ESCAPE, fieldEscape));
// CSV specific properties
getCsvSerdeProperty(table, CSV_SEPARATOR_KEY).ifPresent(csvSeparator -> properties.put(CSV_SEPARATOR, csvSeparator));
getCsvSerdeProperty(table, CSV_QUOTE_KEY).ifPresent(csvQuote -> properties.put(CSV_QUOTE, csvQuote));
getCsvSerdeProperty(table, CSV_ESCAPE_KEY).ifPresent(csvEscape -> properties.put(CSV_ESCAPE, csvEscape));
Optional<String> comment = Optional.ofNullable(table.getParameters().get(TABLE_COMMENT));
String autoPurgeProperty = table.getParameters().get(AUTO_PURGE_KEY);
if (parseBoolean(autoPurgeProperty)) {
properties.put(AUTO_PURGE, true);
}
return new ConnectorTableMetadata(tableName, columns.build(), properties.buildOrThrow(), comment);
}
use of io.trino.plugin.hive.metastore.Table in project trino by trinodb.
the class HiveMetadata method redirectTable.
@Override
public Optional<CatalogSchemaTableName> redirectTable(ConnectorSession session, SchemaTableName tableName) {
requireNonNull(session, "session is null");
requireNonNull(tableName, "tableName is null");
if (isHiveSystemSchema(tableName.getSchemaName())) {
return Optional.empty();
}
// we need to chop off any "$partitions" and similar suffixes from table name while querying the metastore for the Table object
TableNameSplitResult tableNameSplit = splitTableName(tableName.getTableName());
Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableNameSplit.getBaseTableName());
if (table.isEmpty() || VIRTUAL_VIEW.name().equals(table.get().getTableType())) {
return Optional.empty();
}
Optional<CatalogSchemaTableName> catalogSchemaTableName = tableRedirectionsProvider.redirectTable(session, table.get());
// stitch back the suffix we cut off.
return catalogSchemaTableName.map(name -> new CatalogSchemaTableName(name.getCatalogName(), new SchemaTableName(name.getSchemaTableName().getSchemaName(), name.getSchemaTableName().getTableName() + tableNameSplit.getSuffix().orElse(""))));
}
Aggregations