use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class TestTransactionLogAccess method setupTransactionLogAccess.
private void setupTransactionLogAccess(String tableName, Path tableLocation) throws IOException {
TestingConnectorContext context = new TestingConnectorContext();
TypeManager typeManager = context.getTypeManager();
HdfsConfig hdfsConfig = new HdfsConfig();
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
FileFormatDataSourceStats fileFormatDataSourceStats = new FileFormatDataSourceStats();
transactionLogAccess = new TrackingTransactionLogAccess(tableName, tableLocation, SESSION, typeManager, new CheckpointSchemaManager(typeManager), new DeltaLakeConfig(), fileFormatDataSourceStats, hdfsEnvironment, new ParquetReaderConfig());
DeltaLakeTableHandle tableHandle = new DeltaLakeTableHandle("schema", tableName, "location", // ignored
Optional.empty(), TupleDomain.none(), TupleDomain.none(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), 0);
tableSnapshot = transactionLogAccess.loadSnapshot(tableHandle.getSchemaTableName(), tableLocation, SESSION);
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
HiveTableHandle handle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = handle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
List<Column> partitionColumns = table.getPartitionColumns();
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table, typeManager, timestampPrecision);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager, timestampPrecision)));
Map<List<String>, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
if (partitionColumns.isEmpty()) {
// commit analyze to unpartitioned table
metastore.setTableStatistics(table, createPartitionStatistics(columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
} else {
List<List<String>> partitionValuesList;
if (handle.getAnalyzePartitionValues().isPresent()) {
partitionValuesList = handle.getAnalyzePartitionValues().get();
} else {
partitionValuesList = metastore.getPartitionNames(handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(HiveUtil::toPartitionValues).collect(toImmutableList());
}
ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(column.getType()))));
Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
int usedComputedStatistics = 0;
for (List<String> partitionValues : partitionValuesList) {
ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
if (collectedStatistics == null) {
partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
} else {
usedComputedStatistics++;
partitionStatistics.put(partitionValues, createPartitionStatistics(columnTypes, collectedStatistics));
}
}
verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
metastore.setPartitionStatistics(table, partitionStatistics.buildOrThrow());
}
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class GenericHiveRecordCursorProvider method createRecordCursor.
@Override
public Optional<ReaderRecordCursorWithProjections> createRecordCursor(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, TypeManager typeManager, boolean s3SelectPushdownEnabled) {
configuration.setInt(LineRecordReader.MAX_LINE_LENGTH, textMaxLineLengthBytes);
// make sure the FileSystem is created with the proper Configuration object
try {
this.hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
} catch (IOException e) {
throw new TrinoException(HIVE_FILESYSTEM_ERROR, "Failed getting FileSystem: " + path, e);
}
Optional<ReaderColumns> projections = projectBaseColumns(columns);
List<HiveColumnHandle> readerColumns = projections.map(ReaderColumns::get).map(columnHandles -> columnHandles.stream().map(HiveColumnHandle.class::cast).collect(toUnmodifiableList())).orElse(columns);
RecordCursor cursor = hdfsEnvironment.doAs(session.getIdentity(), () -> {
RecordReader<?, ?> recordReader = HiveUtil.createRecordReader(configuration, path, start, length, schema, readerColumns);
try {
return new GenericHiveRecordCursor<>(configuration, path, genericRecordReader(recordReader), length, schema, readerColumns);
} catch (Exception e) {
try {
recordReader.close();
} catch (IOException closeException) {
if (e != closeException) {
e.addSuppressed(closeException);
}
}
throw e;
}
});
return Optional.of(new ReaderRecordCursorWithProjections(cursor, projections));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method getTableHandleForOptimize.
private Optional<ConnectorTableExecuteHandle> getTableHandleForOptimize(ConnectorSession session, ConnectorTableHandle tableHandle, Map<String, Object> executeProperties, RetryMode retryMode) {
// TODO lots of that is copied from beginInsert; rafactoring opportunity
if (!isNonTransactionalOptimizeEnabled(session)) {
// post-optimize data files duplicate rows will be left in table and manual cleanup from user will be required.
throw new TrinoException(NOT_SUPPORTED, "OPTIMIZE procedure must be explicitly enabled via " + NON_TRANSACTIONAL_OPTIMIZE_ENABLED + " session property");
}
if (retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "OPTIMIZE procedure is not supported with query retries enabled");
}
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Optimizing Hive table %s with column type %s not supported", tableName, column.getType()));
}
}
if (isTransactionalTable(table.getParameters())) {
throw new TrinoException(NOT_SUPPORTED, format("Optimizing transactional Hive table %s is not supported", tableName));
}
if (table.getStorage().getBucketProperty().isPresent()) {
throw new TrinoException(NOT_SUPPORTED, format("Optimizing bucketed Hive table %s is not supported", tableName));
}
// TODO forcing NANOSECONDS precision here so we do not loose data. In future we may be smarter; options:
// - respect timestamp_precision but recognize situation when rounding occurs, and fail query
// - detect data's precision and maintain it
List<HiveColumnHandle> columns = hiveColumnHandles(table, typeManager, NANOSECONDS).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table);
Optional.ofNullable(table.getParameters().get(SKIP_HEADER_COUNT_KEY)).map(Integer::parseInt).ifPresent(headerSkipCount -> {
if (headerSkipCount > 1) {
throw new TrinoException(NOT_SUPPORTED, format("Optimizing Hive table %s with value of %s property greater than 1 is not supported", tableName, SKIP_HEADER_COUNT_KEY));
}
});
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Optimizing Hive table %s with %s property not supported", tableName, SKIP_FOOTER_COUNT_KEY));
}
LocationHandle locationHandle = locationService.forOptimize(metastore, session, table);
DataSize fileSizeThreshold = (DataSize) executeProperties.get("file_size_threshold");
return Optional.of(new HiveTableExecuteHandle(OptimizeTableProcedure.NAME, Optional.empty(), Optional.of(fileSizeThreshold.toBytes()), tableName.getSchemaName(), tableName.getTableName(), columns, metastore.generatePageSinkMetadata(tableName), locationHandle, table.getStorage().getBucketProperty(), tableStorageFormat, // TODO: test with multiple partitions using different storage format
tableStorageFormat, NO_ACID_TRANSACTION, retryMode != NO_RETRIES));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method beginUpdate.
@Override
public ConnectorTableHandle beginUpdate(ConnectorSession session, ConnectorTableHandle tableHandle, List<ColumnHandle> updatedColumns, RetryMode retryMode) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (!isFullAcidTable(table.getParameters())) {
throw new TrinoException(NOT_SUPPORTED, "Hive update is only supported for ACID transactional tables");
}
if (!autoCommit) {
throw new TrinoException(NOT_SUPPORTED, "Updating transactional tables is not supported in explicit transactions (use autocommit mode)");
}
if (isSparkBucketedTable(table)) {
throw new TrinoException(NOT_SUPPORTED, "Updating Spark bucketed tables is not supported");
}
// Verify that none of the updated columns are partition columns or bucket columns
Set<String> updatedColumnNames = updatedColumns.stream().map(handle -> ((HiveColumnHandle) handle).getName()).collect(toImmutableSet());
Set<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableSet());
if (!intersection(updatedColumnNames, partitionColumnNames).isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Updating Hive table partition columns is not supported");
}
hiveTableHandle.getBucketHandle().ifPresent(handle -> {
Set<String> bucketColumnNames = handle.getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
if (!intersection(updatedColumnNames, bucketColumnNames).isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Updating Hive table bucket columns is not supported");
}
});
checkTableIsWritable(table, writesToNonManagedTablesEnabled);
for (Column column : table.getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with column type %s not supported", column.getType()));
}
}
List<HiveColumnHandle> allDataColumns = getRegularColumnHandles(table, typeManager, getTimestampPrecision(session)).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableList());
List<HiveColumnHandle> hiveUpdatedColumns = updatedColumns.stream().map(HiveColumnHandle.class::cast).collect(toImmutableList());
if (table.getParameters().containsKey(SKIP_HEADER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with %s property not supported", SKIP_HEADER_COUNT_KEY));
}
if (table.getParameters().containsKey(SKIP_FOOTER_COUNT_KEY)) {
throw new TrinoException(NOT_SUPPORTED, format("Updating a Hive table with %s property not supported", SKIP_FOOTER_COUNT_KEY));
}
if (retryMode != NO_RETRIES) {
throw new TrinoException(NOT_SUPPORTED, "Updating a Hive tables is not supported with query retries enabled");
}
LocationHandle locationHandle = locationService.forExistingTable(metastore, session, table);
HiveUpdateProcessor updateProcessor = new HiveUpdateProcessor(allDataColumns, hiveUpdatedColumns);
AcidTransaction transaction = metastore.beginUpdate(session, table, updateProcessor);
HiveTableHandle updateHandle = hiveTableHandle.withTransaction(transaction);
WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), tableName);
return updateHandle;
}
Aggregations