use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveMetadata method finishCreateTable.
@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), Optional.of(writeInfo.getTargetPath()), handle.isExternal(), prestoVersion, accessControlMetadata.isUsingSystemSecurity());
PrincipalPrivileges principalPrivileges = accessControlMetadata.isUsingSystemSecurity() ? NO_PRIVILEGES : buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, true, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
if (handle.isTransactional()) {
AcidTransaction transaction = handle.getTransaction();
List<String> partitionNames = partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList());
metastore.addDynamicPartitions(handle.getSchemaName(), handle.getTableName(), partitionNames, transaction.getAcidTransactionId(), transaction.getWriteId(), AcidOperation.CREATE_TABLE);
}
}
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
PartitionStatistics tableStatistics;
if (table.getPartitionColumns().isEmpty()) {
HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> reduce(first, second, ADD)).orElse(createZeroStatistics());
tableStatistics = createPartitionStatistics(basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
} else {
tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
}
if (handle.getPartitionedBy().isEmpty()) {
List<String> fileNames;
if (partitionUpdates.isEmpty()) {
// creating empty table via CTAS ... WITH NO DATA
fileNames = ImmutableList.of();
} else {
fileNames = getOnlyElement(partitionUpdates).getFileNames();
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.of(fileNames), false, tableStatistics, handle.isRetriesEnabled());
} else {
metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.empty(), false, tableStatistics, false);
}
if (!handle.getPartitionedBy().isEmpty()) {
if (isRespectTableFormat(session)) {
verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
for (PartitionUpdate update : partitionUpdates) {
Partition partition = buildPartitionObject(session, table, update);
PartitionStatistics partitionStatistics = createPartitionStatistics(update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), Optional.of(update.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList())));
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
if (hiveTable.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
// Only one thread per partition is usable when a table is not transactional
int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
HiveSplitSource splitSource;
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class HiveWriterFactory method createWriter.
public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) {
if (bucketCount.isPresent()) {
checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
} else {
checkArgument(bucketNumber.isEmpty(), "Bucket number provided by for table that is not bucketed");
}
List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position);
Optional<String> partitionName;
if (!partitionColumnNames.isEmpty()) {
partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
} else {
partitionName = Optional.empty();
}
// attempt to get the existing partition (if this is an existing partitioned table)
Optional<Partition> partition = Optional.empty();
if (!partitionValues.isEmpty() && table != null) {
partition = pageSinkMetadataProvider.getPartition(partitionValues);
}
UpdateMode updateMode;
Properties schema;
WriteInfo writeInfo;
StorageFormat outputStorageFormat;
if (partition.isEmpty()) {
if (table == null) {
// Write to: a new partition in a new partitioned table,
// or a new unpartitioned table.
updateMode = UpdateMode.NEW;
schema = new Properties();
schema.setProperty(IOConstants.COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(",")));
schema.setProperty(IOConstants.COLUMNS_TYPES, dataColumns.stream().map(DataColumn::getHiveType).map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));
if (partitionName.isEmpty()) {
// new unpartitioned table
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
} else {
// a new partition in a new partitioned table
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) {
// verify that the target directory for the partition does not already exist
if (HiveWriteUtils.pathExists(new HdfsContext(session), hdfsEnvironment, writeInfo.getTargetPath())) {
throw new TrinoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, writeInfo.getTargetPath()));
}
}
}
} else {
// or an existing unpartitioned table
if (partitionName.isPresent()) {
// a new partition in an existing partitioned table
updateMode = UpdateMode.NEW;
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
} else {
switch(insertExistingPartitionsBehavior) {
case APPEND:
updateMode = UpdateMode.APPEND;
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
break;
case OVERWRITE:
updateMode = UpdateMode.OVERWRITE;
writeInfo = locationService.getTableWriteInfo(locationHandle, true);
break;
case ERROR:
throw new TrinoException(HIVE_TABLE_READ_ONLY, "Unpartitioned Hive tables are immutable");
default:
throw new IllegalArgumentException("Unsupported insert existing table behavior: " + insertExistingPartitionsBehavior);
}
}
schema = getHiveSchema(table);
}
if (partitionName.isPresent()) {
// Write to a new partition
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
} else {
// Write to a new/existing unpartitioned table
outputStorageFormat = fromHiveStorageFormat(tableStorageFormat);
}
} else {
switch(insertExistingPartitionsBehavior) {
// Write to: an existing partition in an existing partitioned table
case APPEND:
// Append to an existing partition
updateMode = UpdateMode.APPEND;
// Check the column types in partition schema match the column types in table schema
List<Column> tableColumns = table.getDataColumns();
List<Column> existingPartitionColumns = partition.get().getColumns();
for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) {
HiveType tableType = tableColumns.get(i).getType();
HiveType partitionType = existingPartitionColumns.get(i).getType();
if (!tableType.equals(partitionType)) {
throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "You are trying to write into an existing partition in a table. " + "The table schema has changed since the creation of the partition. " + "Inserting rows into such partition is not supported. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType));
}
}
HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get());
outputStorageFormat = partition.get().getStorage().getStorageFormat();
schema = getHiveSchema(partition.get(), table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
break;
case OVERWRITE:
// Overwrite an existing partition
//
// The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus:
// * No partition writable check is required.
// * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format).
updateMode = UpdateMode.OVERWRITE;
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
schema = getHiveSchema(table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get());
break;
case ERROR:
throw new TrinoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionName.get());
default:
throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s", insertExistingPartitionsBehavior));
}
}
additionalTableParameters.forEach(schema::setProperty);
validateSchema(partitionName, schema);
int bucketToUse = bucketNumber.isEmpty() ? 0 : bucketNumber.getAsInt();
Path path;
String fileNameWithExtension;
if (transaction.isAcidTransactionRunning()) {
String subdir = computeAcidSubdir(transaction);
Path subdirPath = new Path(writeInfo.getWritePath(), subdir);
path = createHiveBucketPath(subdirPath, bucketToUse, table.getParameters());
fileNameWithExtension = path.getName();
} else {
String fileName = computeFileName(bucketNumber);
fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat);
path = new Path(writeInfo.getWritePath(), fileNameWithExtension);
}
boolean useAcidSchema = isCreateTransactionalTable || (table != null && isFullAcidTable(table.getParameters()));
FileWriter hiveFileWriter = null;
for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
Optional<FileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, conf, session, bucketNumber, transaction, useAcidSchema, WriterKind.INSERT);
if (fileWriter.isPresent()) {
hiveFileWriter = fileWriter.get();
break;
}
}
if (hiveFileWriter == null) {
hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterMemoryUsage(), conf, typeManager, parquetTimeZone, session);
}
String writerImplementation = hiveFileWriter.getClass().getName();
Consumer<HiveWriter> onCommit = hiveWriter -> {
Optional<Long> size;
try {
size = Optional.of(hiveWriter.getWrittenBytes());
} catch (RuntimeException e) {
// Do not fail the query if file system is not available
size = Optional.empty();
}
eventClient.post(new WriteCompletedEvent(session.getQueryId(), path.toString(), schemaName, tableName, partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation, nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHost(), session.getIdentity().getPrincipal().map(Principal::getName).orElse(null), nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount()));
};
if (!sortedBy.isEmpty()) {
FileSystem fileSystem;
Path tempFilePath;
if (sortedWritingTempStagingPathEnabled) {
String tempPrefix = sortedWritingTempStagingPath.replace("${USER}", new HdfsContext(session).getIdentity().getUser());
tempFilePath = new Path(tempPrefix, ".tmp-sort." + path.getParent().getName() + "." + path.getName());
} else {
tempFilePath = new Path(path.getParent(), ".tmp-sort." + path.getName());
}
try {
Configuration configuration = new Configuration(conf);
// Explicitly set the default FS to local file system to avoid getting HDFS when sortedWritingTempStagingPath specifies no scheme
configuration.set(FS_DEFAULT_NAME_KEY, "file:///");
fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), tempFilePath, configuration);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, e);
}
List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager, getTimestampPrecision(session))).collect(toImmutableList());
Map<String, Integer> columnIndexes = new HashMap<>();
for (int i = 0; i < dataColumns.size(); i++) {
columnIndexes.put(dataColumns.get(i).getName(), i);
}
List<Integer> sortFields = new ArrayList<>();
List<SortOrder> sortOrders = new ArrayList<>();
for (SortingColumn column : sortedBy) {
Integer index = columnIndexes.get(column.getColumnName());
if (index == null) {
throw new TrinoException(HIVE_INVALID_METADATA, format("Sorting column '%s' does exist in table '%s.%s'", column.getColumnName(), schemaName, tableName));
}
sortFields.add(index);
sortOrders.add(column.getOrder().getSortOrder());
}
hiveFileWriter = new SortingFileWriter(fileSystem, tempFilePath, hiveFileWriter, sortBufferSize, maxOpenSortFiles, types, sortFields, sortOrders, pageSorter, typeManager.getTypeOperators(), OrcFileWriterFactory::createOrcDataSink);
}
return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension, writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), onCommit, hiveWriterStats);
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class MongoClientModule method createMongoSession.
@Singleton
@Provides
public static MongoSession createMongoSession(TypeManager typeManager, MongoClientConfig config) {
requireNonNull(config, "config is null");
MongoClientSettings.Builder options = MongoClientSettings.builder();
options.writeConcern(config.getWriteConcern().getWriteConcern()).readPreference(config.getReadPreference().getReadPreference()).applyToConnectionPoolSettings(builder -> builder.maxConnectionIdleTime(config.getMaxConnectionIdleTime(), MILLISECONDS).maxWaitTime(config.getMaxWaitTime(), MILLISECONDS).minSize(config.getMinConnectionsPerHost()).maxSize(config.getConnectionsPerHost())).applyToSocketSettings(builder -> builder.connectTimeout(config.getConnectionTimeout(), MILLISECONDS).readTimeout(config.getSocketTimeout(), MILLISECONDS)).applyToSslSettings(builder -> builder.enabled(config.getSslEnabled()));
if (config.getRequiredReplicaSetName() != null) {
options.applyToClusterSettings(builder -> builder.requiredReplicaSetName(config.getRequiredReplicaSetName()));
}
if (config.getConnectionUrl().isPresent()) {
options.applyConnectionString(new ConnectionString(config.getConnectionUrl().get()));
} else {
options.applyToClusterSettings(builder -> builder.hosts(config.getSeeds()));
if (!config.getCredentials().isEmpty()) {
options.credential(config.getCredentials().get(0));
}
}
MongoClient client = MongoClients.create(options.build());
return new MongoSession(typeManager, client, config);
}
use of io.trino.spi.type.TypeManager in project trino by trinodb.
the class RaptorStorageManager method toOrcFileType.
static Type toOrcFileType(Type raptorType, TypeManager typeManager) {
// TIMESTAMPS are stored as BIGINT to void the poor encoding in ORC
if (raptorType.equals(TIMESTAMP_MILLIS)) {
return BIGINT;
}
if (raptorType instanceof ArrayType) {
Type elementType = toOrcFileType(((ArrayType) raptorType).getElementType(), typeManager);
return new ArrayType(elementType);
}
if (raptorType instanceof MapType) {
TypeSignature keyType = toOrcFileType(((MapType) raptorType).getKeyType(), typeManager).getTypeSignature();
TypeSignature valueType = toOrcFileType(((MapType) raptorType).getValueType(), typeManager).getTypeSignature();
return typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.typeParameter(keyType), TypeSignatureParameter.typeParameter(valueType)));
}
if (raptorType instanceof RowType) {
List<Field> fields = ((RowType) raptorType).getFields().stream().map(field -> new Field(field.getName(), toOrcFileType(field.getType(), typeManager))).collect(toImmutableList());
return RowType.from(fields);
}
return raptorType;
}
Aggregations