use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class HiveMetadata method beginCreateTable.
@Override
public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorTableLayout> layout, RetryMode retryMode) {
Optional<Path> externalLocation = Optional.ofNullable(getExternalLocation(tableMetadata.getProperties())).map(HiveMetadata::getExternalLocationAsPath);
if (!createsOfNonManagedTablesEnabled && externalLocation.isPresent()) {
throw new TrinoException(NOT_SUPPORTED, "Creating non-managed Hive tables is disabled");
}
if (!writesToNonManagedTablesEnabled && externalLocation.isPresent()) {
throw new TrinoException(NOT_SUPPORTED, "Writes to non-managed Hive tables is disabled");
}
boolean isTransactional = isTransactional(tableMetadata.getProperties()).orElse(false);
if (isTransactional && externalLocation.isEmpty() && isDelegateTransactionalManagedTableLocationToMetastore(session)) {
throw new TrinoException(NOT_SUPPORTED, "CREATE TABLE AS is not supported for transactional tables without explicit location if location determining is delegated to metastore");
}
if (isTransactional && retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "CREATE TABLE AS is not supported for transactional tables with query retries enabled");
}
if (getAvroSchemaUrl(tableMetadata.getProperties()) != null) {
throw new TrinoException(NOT_SUPPORTED, "CREATE TABLE AS not supported when Avro schema url is set");
}
getHeaderSkipCount(tableMetadata.getProperties()).ifPresent(headerSkipCount -> {
if (headerSkipCount > 1) {
throw new TrinoException(NOT_SUPPORTED, format("Creating Hive table with data with value of %s property greater than 1 is not supported", SKIP_HEADER_COUNT_KEY));
}
});
getFooterSkipCount(tableMetadata.getProperties()).ifPresent(footerSkipCount -> {
if (footerSkipCount > 0) {
throw new TrinoException(NOT_SUPPORTED, format("Creating Hive table with data with value of %s property greater than 0 is not supported", SKIP_FOOTER_COUNT_KEY));
}
});
HiveStorageFormat tableStorageFormat = getHiveStorageFormat(tableMetadata.getProperties());
List<String> partitionedBy = getPartitionedBy(tableMetadata.getProperties());
Optional<HiveBucketProperty> bucketProperty = getBucketProperty(tableMetadata.getProperties());
// get the root directory for the database
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
Map<String, String> tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session));
List<HiveColumnHandle> columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy));
HiveStorageFormat partitionStorageFormat = isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session);
// unpartitioned tables ignore the partition storage format
HiveStorageFormat actualStorageFormat = partitionedBy.isEmpty() ? tableStorageFormat : partitionStorageFormat;
actualStorageFormat.validateColumns(columnHandles);
Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(HiveColumnHandle::toMetastoreColumn).collect(toImmutableList());
checkPartitionTypesSupported(partitionColumns);
LocationHandle locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, externalLocation);
AcidTransaction transaction = isTransactional ? forCreateTable() : NO_ACID_TRANSACTION;
HiveOutputTableHandle result = new HiveOutputTableHandle(schemaName, tableName, columnHandles, metastore.generatePageSinkMetadata(schemaTableName), locationHandle, tableStorageFormat, partitionStorageFormat, partitionedBy, bucketProperty, session.getUser(), tableProperties, transaction, externalLocation.isPresent(), retryMode != NO_RETRIES);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), schemaTableName);
return result;
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class HiveMetadata method beginUpdate.
@Override
public ConnectorTableHandle beginUpdate(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> updatedColumns, RetryMode retryMode) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (!isFullAcidTable(table.getParameters())) {
throw new TrinoException(NOT_SUPPORTED, "Hive update is only supported for ACID transactional tables");
}
if (!autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Updating transactional tables is not supported in explicit transactions (use autocommit mode)");
}
if (isSparkBucketedTable(table)) {
throw new TrinoException(NOT_SUPPORTED, "Updating Spark bucketed tables is not supported");
}
// Verify that none of the updated columns are partition columns or bucket columns
Set<String> updatedColumnNames = updatedColumns.stream().map(handle -> ((HiveColumnHandle) handle).getName()).collect(toImmutableSet());
Set<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableSet());
if (!intersection(updatedColumnNames, partitionColumnNames).isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Updating Hive table partition columns is not supported");
}
hiveTableHandle.getBucketHandle().ifPresent(handle -> {
Set<String> bucketColumnNames = handle.getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
if (!intersection(updatedColumnNames, bucketColumnNames).isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Updating Hive table bucket columns is not supported");
}
});
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with column type %s not supported", column.getType()));
}
}
List<HiveColumnHandle> allDataColumns = getRegularColumnHandles(table, typeManager, getTimestampPrecision(session)).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
List<HiveColumnHandle> hiveUpdatedColumns = updatedColumns.stream().map(HiveColumnHandle.class::cast).collect(toImmutableList());
if (table.getParameters().containsKey(SKIP_HEADER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with %s property not supported", SKIP_HEADER_COUNT_KEY));
}
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY));
}
if (retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "Updating a Hive tables is not supported with query retries enabled");
}
LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table);
HiveUpdateProcessor updateProcessor = new HiveUpdateProcessor(allDataColumns, hiveUpdatedColumns);
AcidTransaction transaction = metastore.beginUpdate(session, table, updateProcessor);
HiveTableHandle updateHandle = hiveTableHandle.withTransaction(transaction);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName);
return updateHandle;
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class HiveMetadata method finishCreateTable.
@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), Optional.of(writeInfo.getTargetPath()), handle.isExternal(), prestoVersion, accessControlMetadata.isUsingSystemSecurity());
PrincipalPrivileges principalPrivileges = accessControlMetadata.isUsingSystemSecurity() ? NO_PRIVILEGES : buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, true, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
if (handle.isTransactional()) {
AcidTransaction transaction = handle.getTransaction();
List<String> partitionNames = partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList());
metastore.addDynamicPartitions(handle.getSchemaName(), handle.getTableName(), partitionNames, transaction.getAcidTransactionId(), transaction.getWriteId(), AcidOperation.CREATE_TABLE);
}
}
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
PartitionStatistics tableStatistics;
if (table.getPartitionColumns().isEmpty()) {
HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> reduce(first, second, ADD)).orElse(createZeroStatistics());
tableStatistics = createPartitionStatistics(basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
} else {
tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
}
if (handle.getPartitionedBy().isEmpty()) {
List<String> fileNames;
if (partitionUpdates.isEmpty()) {
// creating empty table via CTAS ... WITH NO DATA
fileNames = ImmutableList.of();
} else {
fileNames = getOnlyElement(partitionUpdates).getFileNames();
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.of(fileNames), false, tableStatistics, handle.isRetriesEnabled());
} else {
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.empty(), false, tableStatistics, false);
}
if (!handle.getPartitionedBy().isEmpty()) {
if (isRespectTableFormat(session)) {
verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
for (PartitionUpdate update : partitionUpdates) {
Partition partition = buildPartitionObject(session, table, update);
PartitionStatistics partitionStatistics = createPartitionStatistics(update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), Optional.of(update.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList())));
}
Aggregations