use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class MetadataManager method resolve.
@VisibleForTesting
public ResolvedFunction resolve(Session session, FunctionBinding functionBinding, FunctionMetadata functionMetadata, FunctionDependencyDeclaration declaration) {
Map<TypeSignature, Type> dependentTypes = declaration.getTypeDependencies().stream().map(typeSignature -> applyBoundVariables(typeSignature, functionBinding)).collect(toImmutableMap(Function.identity(), typeManager::getType, (left, right) -> left));
ImmutableSet.Builder<ResolvedFunction> functions = ImmutableSet.builder();
declaration.getFunctionDependencies().stream().map(functionDependency -> {
try {
List<TypeSignature> argumentTypes = applyBoundVariables(functionDependency.getArgumentTypes(), functionBinding);
return resolvedFunctionInternal(session, functionDependency.getName(), fromTypeSignatures(argumentTypes));
} catch (TrinoException e) {
if (functionDependency.isOptional()) {
return null;
}
throw e;
}
}).filter(Objects::nonNull).forEach(functions::add);
declaration.getOperatorDependencies().stream().map(operatorDependency -> {
try {
List<TypeSignature> argumentTypes = applyBoundVariables(operatorDependency.getArgumentTypes(), functionBinding);
return resolvedFunctionInternal(session, QualifiedName.of(mangleOperatorName(operatorDependency.getOperatorType())), fromTypeSignatures(argumentTypes));
} catch (TrinoException e) {
if (operatorDependency.isOptional()) {
return null;
}
throw e;
}
}).filter(Objects::nonNull).forEach(functions::add);
declaration.getCastDependencies().stream().map(castDependency -> {
try {
Type fromType = typeManager.getType(applyBoundVariables(castDependency.getFromType(), functionBinding));
Type toType = typeManager.getType(applyBoundVariables(castDependency.getToType(), functionBinding));
return getCoercion(session, fromType, toType);
} catch (TrinoException e) {
if (castDependency.isOptional()) {
return null;
}
throw e;
}
}).filter(Objects::nonNull).forEach(functions::add);
return new ResolvedFunction(functionBinding.getBoundSignature(), functionBinding.getFunctionId(), functionMetadata.getKind(), functionMetadata.isDeterministic(), functionMetadata.getFunctionNullability(), dependentTypes, functions.build());
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method beginInsert.
@Override
public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> columns, RetryMode retryMode) {
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table %s with column type %s not supported", tableName, column.getType()));
}
}
boolean isTransactional = isTransactionalTable(table.getParameters());
if (isTransactional && retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported with query retries enabled");
}
if (isTransactional && !autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Hive transactional tables is not supported in explicit transactions (use autocommit mode)");
}
if (isSparkBucketedTable(table)) {
throw new TrinoException(NOT_SUPPORTED, "Inserting into Spark bucketed tables is not supported");
}
List<HiveColumnHandle> handles = hiveColumnHandles(table, typeManager, getTimestampPrecision(session)).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
Optional.ofNullable(table.getParameters().get(SKIP_HEADER_COUNT_KEY)).map(Integer::parseInt).ifPresent(headerSkipCount -> {
if (headerSkipCount > 1) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with value of %s property greater than 1 is not supported", SKIP_HEADER_COUNT_KEY));
}
});
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Inserting into Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY));
}
LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table);
AcidTransaction transaction = isTransactional ? metastore.beginInsert(session, table) : NO_ACID_TRANSACTION;
HiveInsertTableHandle result = new HiveInsertTableHandle(tableName.getSchemaName(), tableName.getTableName(), handles, metastore.generatePageSinkMetadata(tableName), locationHandle, table.getStorage().getBucketProperty(), tableStorageFormat, isRespectTableFormat(session) ? tableStorageFormat : getHiveStorageFormat(session), transaction, retryMode != NO_RETRIES);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
if (getInsertExistingPartitionsBehavior(session) == InsertExistingPartitionsBehavior.OVERWRITE && writeInfo.getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) {
if (isTransactional) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in transactional tables doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
// Partition overwrite operation is nonatomic thus can't and shouldn't be used in non autocommit context.
if (!autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Overwriting existing partition in non auto commit context doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
}
}
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName);
return result;
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method finishInsert.
@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
Table table = metastore.getTable(handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, false, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
if (handle.isTransactional() && partition.isPresent()) {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(partitionUpdate.getStatistics()).build();
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition.get(), partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), statistics, handle.isRetriesEnabled());
}
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
// insert into unpartitioned table
if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
// get privileges from existing table
PrincipalPrivileges principalPrivileges = fromHivePrivilegeInfos(metastore.listTablePrivileges(handle.getSchemaName(), handle.getTableName(), Optional.empty()));
// first drop it
metastore.dropTable(session, handle.getSchemaName(), handle.getTableName());
// create the table with the new location
metastore.createTable(session, table, principalPrivileges, Optional.of(partitionUpdate.getWritePath()), Optional.of(partitionUpdate.getFileNames()), false, partitionStatistics, handle.isRetriesEnabled());
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == APPEND) {
// insert into unpartitioned table
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, handle.isRetriesEnabled());
} else {
throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
}
} else if (partitionUpdate.getUpdateMode() == APPEND) {
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, handle.isRetriesEnabled());
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
// insert into new partition or overwrite existing partition
Partition partition = buildPartitionObject(session, table, partitionUpdate);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
if (handle.getLocationHandle().getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) {
removeNonCurrentQueryFiles(session, partitionUpdate.getTargetPath());
if (handle.isRetriesEnabled()) {
HdfsContext hdfsContext = new HdfsContext(session);
cleanExtraOutputFiles(hdfsEnvironment, hdfsContext, session.getQueryId(), partitionUpdate.getTargetPath(), ImmutableSet.copyOf(partitionUpdate.getFileNames()));
}
} else {
metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), partition.getValues(), true);
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
}
} else {
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
}
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
if (isFullAcidTable(table.getParameters())) {
HdfsContext context = new HdfsContext(session);
for (PartitionUpdate update : partitionUpdates) {
long writeId = handle.getTransaction().getWriteId();
Path deltaDirectory = new Path(format("%s/%s/%s", table.getStorage().getLocation(), update.getName(), deltaSubdir(writeId, writeId, 0)));
createOrcAcidVersionFile(context, deltaDirectory);
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList())));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method getInsertLayout.
@Override
public Optional<ConnectorTableLayout> getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (table.getStorage().getBucketProperty().isPresent()) {
if (!isSupportedBucketing(table)) {
throw new TrinoException(NOT_SUPPORTED, "Cannot write to a table bucketed on an unsupported type");
}
} else // Note: we cannot use hiveTableHandle.isInAcidTransaction() here as transaction is not yet set in HiveTableHandle when getInsertLayout is called
if (isFullAcidTable(table.getParameters())) {
table = Table.builder(table).withStorage(storage -> storage.setBucketProperty(Optional.of(new HiveBucketProperty(ImmutableList.of(), HiveBucketing.BucketingVersion.BUCKETING_V2, 1, ImmutableList.of())))).build();
}
Optional<HiveBucketHandle> hiveBucketHandle = getHiveBucketHandle(session, table, typeManager);
List<Column> partitionColumns = table.getPartitionColumns();
if (hiveBucketHandle.isEmpty()) {
// return preferred layout which is partitioned by partition columns
if (partitionColumns.isEmpty()) {
return Optional.empty();
}
return Optional.of(new ConnectorTableLayout(partitionColumns.stream().map(Column::getName).collect(toImmutableList())));
}
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new NoSuchElementException("Bucket property should be set"));
if (!bucketProperty.getSortedBy().isEmpty() && !isSortedWritingEnabled(session)) {
throw new TrinoException(NOT_SUPPORTED, "Writing to bucketed sorted Hive tables is disabled");
}
ImmutableList.Builder<String> partitioningColumns = ImmutableList.builder();
hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).forEach(partitioningColumns::add);
partitionColumns.stream().map(Column::getName).forEach(partitioningColumns::add);
HivePartitioningHandle partitioningHandle = new HivePartitioningHandle(hiveBucketHandle.get().getBucketingVersion(), hiveBucketHandle.get().getTableBucketCount(), hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.of(hiveBucketHandle.get().getTableBucketCount()), !partitionColumns.isEmpty() && isParallelPartitionedBucketedWrites(session));
return Optional.of(new ConnectorTableLayout(partitioningHandle, partitioningColumns.build()));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = createTextVectorEncoding(schema);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
OutputStream outputStream = fileSystem.create(path, false);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory));
} catch (Exception e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
Aggregations