use of io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR in project trino by trinodb.
the class CreateTableTask method internalExecute.
@VisibleForTesting
ListenableFuture<Void> internalExecute(CreateTable statement, Session session, List<Expression> parameters, Consumer<Output> outputConsumer) {
checkArgument(!statement.getElements().isEmpty(), "no columns for table");
Map<NodeRef<Parameter>, Expression> parameterLookup = parameterExtractor(statement, parameters);
QualifiedObjectName tableName = createQualifiedObjectName(session, statement, statement.getName());
Optional<TableHandle> tableHandle = plannerContext.getMetadata().getTableHandle(session, tableName);
if (tableHandle.isPresent()) {
if (!statement.isNotExists()) {
throw semanticException(TABLE_ALREADY_EXISTS, statement, "Table '%s' already exists", tableName);
}
return immediateVoidFuture();
}
CatalogName catalogName = getRequiredCatalogHandle(plannerContext.getMetadata(), session, statement, tableName.getCatalogName());
LinkedHashMap<String, ColumnMetadata> columns = new LinkedHashMap<>();
Map<String, Object> inheritedProperties = ImmutableMap.of();
boolean includingProperties = false;
for (TableElement element : statement.getElements()) {
if (element instanceof ColumnDefinition) {
ColumnDefinition column = (ColumnDefinition) element;
String name = column.getName().getValue().toLowerCase(Locale.ENGLISH);
Type type;
try {
type = plannerContext.getTypeManager().getType(toTypeSignature(column.getType()));
} catch (TypeNotFoundException e) {
throw semanticException(TYPE_NOT_FOUND, element, "Unknown type '%s' for column '%s'", column.getType(), column.getName());
}
if (type.equals(UNKNOWN)) {
throw semanticException(COLUMN_TYPE_UNKNOWN, element, "Unknown type '%s' for column '%s'", column.getType(), column.getName());
}
if (columns.containsKey(name)) {
throw semanticException(DUPLICATE_COLUMN_NAME, column, "Column name '%s' specified more than once", column.getName());
}
if (!column.isNullable() && !plannerContext.getMetadata().getConnectorCapabilities(session, catalogName).contains(NOT_NULL_COLUMN_CONSTRAINT)) {
throw semanticException(NOT_SUPPORTED, column, "Catalog '%s' does not support non-null column for column name '%s'", catalogName.getCatalogName(), column.getName());
}
Map<String, Object> columnProperties = columnPropertyManager.getProperties(catalogName, column.getProperties(), session, plannerContext, accessControl, parameterLookup, true);
columns.put(name, ColumnMetadata.builder().setName(name).setType(type).setNullable(column.isNullable()).setComment(column.getComment()).setProperties(columnProperties).build());
} else if (element instanceof LikeClause) {
LikeClause likeClause = (LikeClause) element;
QualifiedObjectName originalLikeTableName = createQualifiedObjectName(session, statement, likeClause.getTableName());
if (plannerContext.getMetadata().getCatalogHandle(session, originalLikeTableName.getCatalogName()).isEmpty()) {
throw semanticException(CATALOG_NOT_FOUND, statement, "LIKE table catalog '%s' does not exist", originalLikeTableName.getCatalogName());
}
RedirectionAwareTableHandle redirection = plannerContext.getMetadata().getRedirectionAwareTableHandle(session, originalLikeTableName);
TableHandle likeTable = redirection.getTableHandle().orElseThrow(() -> semanticException(TABLE_NOT_FOUND, statement, "LIKE table '%s' does not exist", originalLikeTableName));
QualifiedObjectName likeTableName = redirection.getRedirectedTableName().orElse(originalLikeTableName);
if (!tableName.getCatalogName().equals(likeTableName.getCatalogName())) {
String message = "CREATE TABLE LIKE across catalogs is not supported";
if (!originalLikeTableName.equals(likeTableName)) {
message += format(". LIKE table '%s' redirected to '%s'.", originalLikeTableName, likeTableName);
}
throw semanticException(NOT_SUPPORTED, statement, message);
}
TableMetadata likeTableMetadata = plannerContext.getMetadata().getTableMetadata(session, likeTable);
Optional<LikeClause.PropertiesOption> propertiesOption = likeClause.getPropertiesOption();
if (propertiesOption.isPresent() && propertiesOption.get() == LikeClause.PropertiesOption.INCLUDING) {
if (includingProperties) {
throw semanticException(NOT_SUPPORTED, statement, "Only one LIKE clause can specify INCLUDING PROPERTIES");
}
includingProperties = true;
inheritedProperties = likeTableMetadata.getMetadata().getProperties();
}
try {
accessControl.checkCanSelectFromColumns(session.toSecurityContext(), likeTableName, likeTableMetadata.getColumns().stream().map(ColumnMetadata::getName).collect(toImmutableSet()));
} catch (AccessDeniedException e) {
throw new AccessDeniedException("Cannot reference columns of table " + likeTableName);
}
if (propertiesOption.orElse(EXCLUDING) == INCLUDING) {
try {
accessControl.checkCanShowCreateTable(session.toSecurityContext(), likeTableName);
} catch (AccessDeniedException e) {
throw new AccessDeniedException("Cannot reference properties of table " + likeTableName);
}
}
likeTableMetadata.getColumns().stream().filter(column -> !column.isHidden()).forEach(column -> {
if (columns.containsKey(column.getName().toLowerCase(Locale.ENGLISH))) {
throw semanticException(DUPLICATE_COLUMN_NAME, element, "Column name '%s' specified more than once", column.getName());
}
columns.put(column.getName().toLowerCase(Locale.ENGLISH), column);
});
} else {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Invalid TableElement: " + element.getClass().getName());
}
}
Map<String, Object> properties = tablePropertyManager.getProperties(catalogName, statement.getProperties(), session, plannerContext, accessControl, parameterLookup, true);
accessControl.checkCanCreateTable(session.toSecurityContext(), tableName, properties);
Set<String> specifiedPropertyKeys = statement.getProperties().stream().map(property -> property.getName().getValue()).collect(toImmutableSet());
Map<String, Object> finalProperties = combineProperties(specifiedPropertyKeys, properties, inheritedProperties);
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName.asSchemaTableName(), ImmutableList.copyOf(columns.values()), finalProperties, statement.getComment());
try {
plannerContext.getMetadata().createTable(session, tableName.getCatalogName(), tableMetadata, statement.isNotExists());
} catch (TrinoException e) {
// connectors are not required to handle the ignoreExisting flag
if (!e.getErrorCode().equals(ALREADY_EXISTS.toErrorCode()) || !statement.isNotExists()) {
throw e;
}
}
outputConsumer.accept(new Output(tableName.getCatalogName(), tableName.getSchemaName(), tableName.getObjectName(), Optional.of(tableMetadata.getColumns().stream().map(column -> new OutputColumn(new Column(column.getName(), column.getType().toString()), ImmutableSet.of())).collect(toImmutableList()))));
return immediateVoidFuture();
}
use of io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR in project trino by trinodb.
the class VacuumProcedure method doVacuum.
private void doVacuum(ConnectorSession session, String schema, String table, String retention) throws IOException {
checkProcedureArgument(schema != null, "schema_name cannot be null");
checkProcedureArgument(!schema.isEmpty(), "schema_name cannot be empty");
checkProcedureArgument(table != null, "table_name cannot be null");
checkProcedureArgument(!table.isEmpty(), "table_name cannot be empty");
checkProcedureArgument(retention != null, "retention cannot be null");
Duration retentionDuration = Duration.valueOf(retention);
Duration minRetention = getVacuumMinRetention(session);
checkProcedureArgument(retentionDuration.compareTo(minRetention) >= 0, "Retention specified (%s) is shorter than the minimum retention configured in the system (%s). " + "Minimum retention can be changed with %s configuration property or %s.%s session property", retentionDuration, minRetention, DeltaLakeConfig.VACUUM_MIN_RETENTION, catalogName, DeltaLakeSessionProperties.VACUUM_MIN_RETENTION);
Instant threshold = Instant.now().minusMillis(retentionDuration.toMillis());
DeltaLakeMetadata metadata = metadataFactory.create(session.getIdentity());
SchemaTableName tableName = new SchemaTableName(schema, table);
DeltaLakeTableHandle handle = metadata.getTableHandle(session, tableName);
checkProcedureArgument(handle != null, "Table '%s' does not exist", tableName);
TableSnapshot tableSnapshot = transactionLogAccess.loadSnapshot(tableName, new Path(handle.getLocation()), session);
Path tableLocation = tableSnapshot.getTableLocation();
Path transactionLogDir = getTransactionLogDir(tableLocation);
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session), tableLocation);
String commonPathPrefix = tableLocation + "/";
String queryId = session.getQueryId();
// Retain all active files and every file removed by a "recent" transaction (except for the oldest "recent").
// Any remaining file are not live, and not needed to read any "recent" snapshot.
List<Long> recentVersions = transactionLogAccess.getPastTableVersions(fileSystem, transactionLogDir, threshold, tableSnapshot.getVersion());
Set<String> retainedPaths = Stream.concat(transactionLogAccess.getActiveFiles(tableSnapshot, session).stream().map(AddFileEntry::getPath), transactionLogAccess.getJsonEntries(fileSystem, transactionLogDir, // active files, but still needed to read a "recent" snapshot
recentVersions.stream().sorted(naturalOrder()).skip(1).collect(toImmutableList())).map(DeltaLakeTransactionLogEntry::getRemove).filter(Objects::nonNull).map(RemoveFileEntry::getPath)).peek(path -> checkState(!path.startsWith(tableLocation.toString()), "Unexpected absolute path in transaction log: %s", path)).collect(toImmutableSet());
log.debug("[%s] attempting to vacuum table %s [%s] with %s retention (expiry threshold %s). %s data file paths marked for retention", queryId, tableName, tableLocation, retention, threshold, retainedPaths.size());
long nonFiles = 0;
long allPathsChecked = 0;
long transactionLogFiles = 0;
long retainedKnownFiles = 0;
long retainedUnknownFiles = 0;
long removedFiles = 0;
RemoteIterator<LocatedFileStatus> listing = fileSystem.listFiles(tableLocation, true);
while (listing.hasNext()) {
LocatedFileStatus fileStatus = listing.next();
Path path = fileStatus.getPath();
checkState(path.toString().startsWith(commonPathPrefix), "Unexpected path [%s] returned when listing files under [%s]", path, tableLocation);
String relativePath = path.toString().substring(commonPathPrefix.length());
if (relativePath.isEmpty()) {
// A file returned for "tableLocation/", might be possible on S3.
continue;
}
allPathsChecked++;
// TODO Note: Databricks can delete directories during vacuum on s3. This might need to be revisited.
if (!fileStatus.isFile()) {
nonFiles++;
continue;
}
// ignore tableLocation/_delta_log/**
if (relativePath.equals(TRANSACTION_LOG_DIRECTORY) || relativePath.startsWith(TRANSACTION_LOG_DIRECTORY + "/")) {
log.debug("[%s] skipping a file inside transaction log dir: %s", queryId, path);
transactionLogFiles++;
continue;
}
// skip retained files
if (retainedPaths.contains(relativePath)) {
log.debug("[%s] retaining a known file: %s", queryId, path);
retainedKnownFiles++;
continue;
}
// ignore recently created files
long modificationTime = fileStatus.getModificationTime();
Instant modificationInstant = Instant.ofEpochMilli(modificationTime);
if (!modificationInstant.isBefore(threshold)) {
log.debug("[%s] retaining an unknown file %s with modification time %s (%s)", queryId, path, modificationTime, modificationInstant);
retainedUnknownFiles++;
continue;
}
log.debug("[%s] deleting file [%s] with modification time %s (%s)", queryId, path, modificationTime, modificationInstant);
if (!fileSystem.delete(path, false)) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to delete file: " + path);
}
removedFiles++;
}
log.info("[%s] finished vacuuming table %s [%s]: files checked: %s; non-files: %s; metadata files: %s; retained known files: %s; retained unknown files: %s; removed files: %s", queryId, tableName, tableLocation, allPathsChecked, nonFiles, transactionLogFiles, retainedKnownFiles, retainedUnknownFiles, removedFiles);
}
use of io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR in project trino by trinodb.
the class DeltaLakeMetadata method beginInsert.
@Override
public ConnectorInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns) {
DeltaLakeTableHandle table = (DeltaLakeTableHandle) tableHandle;
if (!allowWrite(session, table)) {
String fileSystem = new Path(table.getLocation()).toUri().getScheme();
throw new TrinoException(NOT_SUPPORTED, format("Inserts are not supported on the %s filesystem", fileSystem));
}
checkSupportedWriterVersion(session, table.getSchemaTableName());
List<DeltaLakeColumnHandle> inputColumns = columns.stream().map(handle -> (DeltaLakeColumnHandle) handle).collect(toImmutableList());
ConnectorTableMetadata tableMetadata = getTableMetadata(session, table);
// This check acts as a safeguard in cases where the input columns may differ from the table metadata case-sensitively
checkAllColumnsPassedOnInsert(tableMetadata, inputColumns);
String tableLocation = getLocation(tableMetadata.getProperties());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableLocation));
return new DeltaLakeInsertTableHandle(table.getSchemaName(), table.getTableName(), tableLocation, table.getMetadataEntry(), inputColumns, getMandatoryCurrentVersion(fileSystem, new Path(tableLocation)));
} catch (IOException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
}
use of io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR in project trino by trinodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
if (hiveTable.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
// Only one thread per partition is usable when a table is not transactional
int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
HiveSplitSource splitSource;
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
Aggregations