use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMetadata method createTemporaryTable.
@Override
public ConnectorTableHandle createTemporaryTable(ConnectorSession session, List<ColumnMetadata> columns, Optional<ConnectorPartitioningMetadata> partitioningMetadata) {
String schemaName = getTemporaryTableSchema(session);
HiveStorageFormat storageFormat = getTemporaryTableStorageFormat(session);
Optional<HiveBucketProperty> bucketProperty = partitioningMetadata.map(partitioning -> {
Set<String> allColumns = columns.stream().map(ColumnMetadata::getName).collect(toImmutableSet());
if (!allColumns.containsAll(partitioning.getPartitionColumns())) {
throw new PrestoException(INVALID_TABLE_PROPERTY, format("Bucketing columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(partitioning.getPartitionColumns()), allColumns)));
}
HivePartitioningHandle partitioningHandle = (HivePartitioningHandle) partitioning.getPartitioningHandle();
List<String> partitionColumns = partitioning.getPartitionColumns();
BucketFunctionType bucketFunctionType = partitioningHandle.getBucketFunctionType();
switch(bucketFunctionType) {
case HIVE_COMPATIBLE:
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty());
case PRESTO_NATIVE:
Map<String, Type> columnNameToTypeMap = columns.stream().collect(toMap(ColumnMetadata::getName, ColumnMetadata::getType));
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), PRESTO_NATIVE, Optional.of(partitionColumns.stream().map(columnNameToTypeMap::get).collect(toImmutableList())));
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketFunctionType);
}
});
if (isUsePageFileForHiveUnsupportedType(session)) {
if (!columns.stream().map(ColumnMetadata::getType).allMatch(HiveTypeTranslator::isSupportedHiveType)) {
storageFormat = PAGEFILE;
}
}
// PAGEFILE format doesn't require translation to hive type,
// choose HIVE_BINARY as a default hive type to make it compatible with Hive connector
Optional<HiveType> defaultHiveType = storageFormat == PAGEFILE ? Optional.of(HIVE_BINARY) : Optional.empty();
List<HiveColumnHandle> columnHandles = getColumnHandles(// type to the boolean type that is binary compatible
translateHiveUnsupportedTypesForTemporaryTable(columns, typeManager), ImmutableSet.of(), typeTranslator, defaultHiveType);
validateColumns(storageFormat, columnHandles);
HiveStorageFormat finalStorageFormat = storageFormat;
String tableName = PRESTO_TEMPORARY_TABLE_NAME_PREFIX + finalStorageFormat.name() + "_" + session.getQueryId().replaceAll("-", "_") + "_" + randomUUID().toString().replaceAll("-", "_");
Table table = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(session.getUser()).setTableType(TEMPORARY_TABLE).setDataColumns(columnHandles.stream().map(handle -> new Column(handle.getName(), handle.getHiveType(), handle.getComment(), Optional.empty())).collect(toImmutableList())).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(finalStorageFormat)).setBucketProperty(bucketProperty).setLocation("")).build();
List<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(getSupportedColumnStatisticsForTemporaryTable(typeManager.getType(column.getTypeSignature())))));
metastore.createTable(session, table, buildInitialPrivilegeSet(table.getOwner()), Optional.empty(), false, createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
return new HiveTableHandle(schemaName, tableName);
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HivePageSourceProvider method createSelectivePageSource.
private static Optional<ConnectorPageSource> createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, HiveTableLayoutHandle layout, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, LoadingCache<RowExpressionCacheKey, RowExpression> rowExpressionCache, SplitContext splitContext, Optional<EncryptionInformation> encryptionInformation) {
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(layout.getPredicateColumns().values()).addAll(split.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
Set<String> columnNames = columns.stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
List<HiveColumnHandle> allColumns = ImmutableList.<HiveColumnHandle>builder().addAll(columns).addAll(interimColumns.stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
Path path = new Path(split.getPath());
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), allColumns, ImmutableList.of(), split.getTableToPartitionMapping(), path, split.getTableBucketNumber(), split.getFileSize(), split.getFileModifiedTime());
Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));
Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
RowExpression optimizedRemainingPredicate = rowExpressionCache.getUnchecked(new RowExpressionCacheKey(layout.getRemainingPredicate(), session));
if (shouldSkipBucket(layout, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
if (shouldSkipPartition(typeManager, layout, hiveStorageTimeZone, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
CacheQuota cacheQuota = generateCacheQuota(split);
for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getStorage(), toColumnHandles(columnMappings, true), prefilledValues, coercers, bucketAdaptation, outputColumns, splitContext.getDynamicFilterPredicate().map(filter -> filter.transform(handle -> new Subfield(((HiveColumnHandle) handle).getName())).intersect(layout.getDomainPredicate())).orElse(layout.getDomainPredicate()), optimizedRemainingPredicate, hiveStorageTimeZone, new HiveFileContext(splitContext.isCacheable(), cacheQuota, split.getExtraFileInfo().map(BinaryExtraHiveFileInfo::new), Optional.of(split.getFileSize()), split.getFileModifiedTime(), HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)), encryptionInformation);
if (pageSource.isPresent()) {
return Optional.of(pageSource.get());
}
}
return Optional.empty();
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMaterializedViewUtils method getMaterializedDataPredicates.
public static MaterializedDataPredicates getMaterializedDataPredicates(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, TypeManager typeManager, Table table, DateTimeZone timeZone) {
List<Column> partitionColumns = table.getPartitionColumns();
for (Column partitionColumn : partitionColumns) {
HiveType hiveType = partitionColumn.getType();
if (!hiveType.isSupportedType()) {
throw new PrestoException(NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
}
}
List<HiveColumnHandle> partitionKeyColumnHandles = getPartitionKeyColumnHandles(table);
Map<String, Type> partitionTypes = partitionKeyColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(column.getTypeSignature())));
List<String> partitionNames = metastore.getPartitionNames(metastoreContext, table.getDatabaseName(), table.getTableName()).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName())));
ImmutableList.Builder<TupleDomain<String>> partitionNamesAndValues = ImmutableList.builder();
for (String partitionName : partitionNames) {
ImmutableMap.Builder<String, NullableValue> partitionNameAndValuesMap = ImmutableMap.builder();
Map<String, String> partitions = toPartitionNamesAndValues(partitionName);
if (partitionColumns.size() != partitions.size()) {
throw new PrestoException(HIVE_INVALID_METADATA, String.format("Expected %d partition key values, but got %d", partitionColumns.size(), partitions.size()));
}
partitionTypes.forEach((name, type) -> {
String value = partitions.get(name);
if (value == null) {
throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, String.format("partition key value cannot be null for field: %s", name));
}
partitionNameAndValuesMap.put(name, parsePartitionValue(name, value, type, timeZone));
});
TupleDomain<String> tupleDomain = TupleDomain.fromFixedValues(partitionNameAndValuesMap.build());
partitionNamesAndValues.add(tupleDomain);
}
return new MaterializedDataPredicates(partitionNamesAndValues.build(), partitionColumns.stream().map(Column::getName).collect(toImmutableList()));
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class RcFilePageSourceFactory method createPageSource.
@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
} else {
return Optional.empty();
}
if (fileSize == 0) {
throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
}
FSDataInputStream inputStream;
try {
inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
try {
ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
for (HiveColumnHandle column : columns) {
readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
}
RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
} catch (Throwable e) {
try {
inputStream.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof RcFileCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, message, e);
}
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class HiveMetadata method finishInsertInternal.
private Optional<ConnectorOutputMetadata> finishInsertInternal(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
}
if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
// replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), false);
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of)));
zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
}
}
List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
Set<String> existingPartitions = getExistingPartitionNames(session.getIdentity(), metastoreContext, handle.getSchemaName(), handle.getTableName(), partitionUpdates);
for (PartitionUpdate partitionUpdate : partitionUpdates) {
if (partitionUpdate.getName().isEmpty()) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing table with encryption enabled is not supported yet");
}
// insert into unpartitioned table
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == APPEND) {
if (handle.getEncryptionInformation().isPresent()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Inserting into an existing partition with encryption enabled is not supported yet");
}
// insert into existing partition
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partitionValues, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), partitionStatistics);
} else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
Map<String, String> extraPartitionMetadata = handle.getEncryptionInformation().map(encryptionInfo -> encryptionInfo.getDwrfEncryptionMetadata().map(DwrfEncryptionMetadata::getExtraMetadata).orElseGet(ImmutableMap::of)).orElseGet(ImmutableMap::of);
if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
// Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
extraPartitionMetadata = updatePartitionMetadataWithFileNamesAndSizes(partitionUpdate, extraPartitionMetadata);
}
// Track the manifest blob size
getManifestSizeInBytes(session, partitionUpdate, extraPartitionMetadata).ifPresent(hivePartitionStats::addManifestSizeInBytes);
// insert into new partition or overwrite existing partition
Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, extraPartitionMetadata);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
if (existingPartitions.contains(partitionUpdate.getName())) {
if (partitionUpdate.getUpdateMode() == OVERWRITE) {
metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), handle.getLocationHandle().getTargetPath().toString(), partition.getValues());
} else {
throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionUpdate.getName());
}
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), false, partition, partitionUpdate.getWritePath(), partitionStatistics);
} else {
throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
}
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).map(name -> name.isEmpty() ? UNPARTITIONED_ID : name).collect(toList())));
}
Aggregations