Search in sources :

Example 66 with Type

use of io.trino.spi.type.Type in project trino by trinodb.

the class PropertiesSystemTableProvider method getSystemTable.

@Override
public Optional<SystemTable> getSystemTable(HiveMetadata metadata, ConnectorSession session, SchemaTableName tableName) {
    if (!PROPERTIES.matches(tableName)) {
        return Optional.empty();
    }
    SchemaTableName sourceTableName = PROPERTIES.getSourceTableName(tableName);
    Table table = metadata.getMetastore().getTable(sourceTableName.getSchemaName(), sourceTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    if (isDeltaLakeTable(table) || isIcebergTable(table)) {
        return Optional.empty();
    }
    Map<String, String> sortedTableParameters = ImmutableSortedMap.copyOf(table.getParameters());
    List<ColumnMetadata> columns = sortedTableParameters.keySet().stream().map(key -> new ColumnMetadata(key, VarcharType.VARCHAR)).collect(toImmutableList());
    List<Type> types = columns.stream().map(ColumnMetadata::getType).collect(toImmutableList());
    Iterable<List<Object>> propertyValues = ImmutableList.of(ImmutableList.copyOf(sortedTableParameters.values()));
    return Optional.of(createSystemTable(new ConnectorTableMetadata(sourceTableName, columns), constraint -> new InMemoryRecordSet(types, propertyValues).cursor()));
}
Also used : HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Table(io.trino.plugin.hive.metastore.Table) PROPERTIES(io.trino.plugin.hive.SystemTableHandler.PROPERTIES) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Type(io.trino.spi.type.Type) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorSession(io.trino.spi.connector.ConnectorSession) InMemoryRecordSet(io.trino.spi.connector.InMemoryRecordSet) SchemaTableName(io.trino.spi.connector.SchemaTableName) VarcharType(io.trino.spi.type.VarcharType) List(java.util.List) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Optional(java.util.Optional) SystemTables.createSystemTable(io.trino.plugin.hive.util.SystemTables.createSystemTable) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) SystemTable(io.trino.spi.connector.SystemTable) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) SystemTables.createSystemTable(io.trino.plugin.hive.util.SystemTables.createSystemTable) SystemTable(io.trino.spi.connector.SystemTable) SchemaTableName(io.trino.spi.connector.SchemaTableName) InMemoryRecordSet(io.trino.spi.connector.InMemoryRecordSet) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 67 with Type

use of io.trino.spi.type.Type in project trino by trinodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
        rcFileEncoding = new BinaryRcFileEncoding(timeZone);
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
        rcFileEncoding = createTextVectorEncoding(schema);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
        OutputStream outputStream = fileSystem.create(path, false);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory));
    } catch (Exception e) {
        throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}
Also used : HdfsRcFileDataSource(io.trino.plugin.hive.rcfile.HdfsRcFileDataSource) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Type(io.trino.spi.type.Type) Callable(java.util.concurrent.Callable) RcFileDataSource(io.trino.rcfile.RcFileDataSource) OptionalInt(java.util.OptionalInt) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) HiveUtil.getColumnNames(io.trino.plugin.hive.util.HiveUtil.getColumnNames) HiveSessionProperties.isRcfileOptimizedWriterValidate(io.trino.plugin.hive.HiveSessionProperties.isRcfileOptimizedWriterValidate) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OutputStream(java.io.OutputStream) HIVE_WRITER_OPEN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveUtil.getColumnTypes(io.trino.plugin.hive.util.HiveUtil.getColumnTypes) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) RcFileEncoding(io.trino.rcfile.RcFileEncoding) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) RcFilePageSourceFactory.createTextVectorEncoding(io.trino.plugin.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Collectors.toList(java.util.stream.Collectors.toList) HIVE_WRITE_VALIDATION_FAILED(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) Optional(java.util.Optional) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) TypeManager(io.trino.spi.type.TypeManager) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(io.trino.rcfile.RcFileEncoding) IOException(java.io.IOException) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) Type(io.trino.spi.type.Type) HdfsRcFileDataSource(io.trino.plugin.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) TrinoException(io.trino.spi.TrinoException) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding)

Example 68 with Type

use of io.trino.spi.type.Type in project trino by trinodb.

the class HivePartitionManager method getPartitions.

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint constraint) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary().intersect(hiveTableHandle.getEnforcedConstraint());
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Optional<HiveBucketHandle> hiveBucketHandle = hiveTableHandle.getBucketHandle();
    List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
    if (effectivePredicate.isNone()) {
        return new HivePartitionResult(partitionColumns, Optional.empty(), ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle, Optional.empty());
    }
    Optional<HiveBucketFilter> bucketFilter = getHiveBucketFilter(hiveTableHandle, effectivePredicate);
    TupleDomain<HiveColumnHandle> compactEffectivePredicate = effectivePredicate.transformKeys(HiveColumnHandle.class::cast).simplify(domainCompactionThreshold);
    if (partitionColumns.isEmpty()) {
        return new HivePartitionResult(partitionColumns, Optional.empty(), ImmutableList.of(new HivePartition(tableName)), effectivePredicate, compactEffectivePredicate, hiveBucketHandle, bucketFilter);
    }
    List<Type> partitionTypes = partitionColumns.stream().map(HiveColumnHandle::getType).collect(toList());
    Optional<List<String>> partitionNames = Optional.empty();
    Iterable<HivePartition> partitionsIterable;
    Predicate<Map<ColumnHandle, NullableValue>> predicate = constraint.predicate().orElse(value -> true);
    if (hiveTableHandle.getPartitions().isPresent()) {
        partitionsIterable = hiveTableHandle.getPartitions().get().stream().filter(partition -> partitionMatches(partitionColumns, effectivePredicate, predicate, partition)).collect(toImmutableList());
    } else {
        List<String> partitionNamesList = hiveTableHandle.getPartitionNames().orElseGet(() -> getFilteredPartitionNames(metastore, tableName, partitionColumns, compactEffectivePredicate));
        partitionsIterable = () -> partitionNamesList.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, effectivePredicate, predicate)).filter(Optional::isPresent).map(Optional::get).iterator();
        partitionNames = Optional.of(partitionNamesList);
    }
    return new HivePartitionResult(partitionColumns, partitionNames, partitionsIterable, effectivePredicate, compactEffectivePredicate, hiveBucketHandle, bucketFilter);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Optional(java.util.Optional) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveBucketing.getHiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter) Type(io.trino.spi.type.Type) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 69 with Type

use of io.trino.spi.type.Type in project trino by trinodb.

the class HivePartitionManager method getPartitions.

public HivePartitionResult getPartitions(ConnectorTableHandle tableHandle, List<List<String>> partitionValuesList) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    List<HiveColumnHandle> partitionColumns = hiveTableHandle.getPartitionColumns();
    Optional<HiveBucketHandle> bucketHandle = hiveTableHandle.getBucketHandle();
    List<String> partitionColumnNames = partitionColumns.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    List<Type> partitionColumnTypes = partitionColumns.stream().map(HiveColumnHandle::getType).collect(toImmutableList());
    List<HivePartition> partitionList = partitionValuesList.stream().map(partitionValues -> toPartitionName(partitionColumnNames, partitionValues)).map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionColumnTypes, TupleDomain.all(), value -> true)).map(partition -> partition.orElseThrow(() -> new VerifyException("partition must exist"))).collect(toImmutableList());
    return new HivePartitionResult(partitionColumns, Optional.empty(), partitionList, TupleDomain.all(), TupleDomain.all(), bucketHandle, Optional.empty());
}
Also used : Constraint(io.trino.spi.connector.Constraint) NullableValue(io.trino.spi.predicate.NullableValue) Type(io.trino.spi.type.Type) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveBucketing.getHiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter) Inject(javax.inject.Inject) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.parsePartitionValue(io.trino.plugin.hive.util.HiveUtil.parsePartitionValue) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ColumnHandle(io.trino.spi.connector.ColumnHandle) VerifyException(com.google.common.base.VerifyException) ImmutableSet(com.google.common.collect.ImmutableSet) Iterator(java.util.Iterator) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) TupleDomain(io.trino.spi.predicate.TupleDomain) Sets(com.google.common.collect.Sets) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) MetastoreUtil.toPartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.toPartitionName) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) HIVE_EXCEEDED_PARTITION_LIMIT(io.trino.plugin.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT) MetastoreUtil.computePartitionKeyFilter(io.trino.plugin.hive.metastore.MetastoreUtil.computePartitionKeyFilter) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) SchemaTableName(io.trino.spi.connector.SchemaTableName) Type(io.trino.spi.type.Type) VerifyException(com.google.common.base.VerifyException)

Example 70 with Type

use of io.trino.spi.type.Type in project trino by trinodb.

the class OrcPageSourceFactory method createOrcPageSource.

private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
    try {
        Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
        if (optionalOrcReader.isEmpty()) {
            return new EmptyPageSource();
        }
        OrcReader reader = optionalOrcReader.get();
        if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
            validateOrcAcidVersion(path, reader);
        }
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
        List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
        List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
        List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
        if (isFullAcid && !originalFilesPresent) {
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
        Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        } else {
            projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            OrcReader.ProjectedLayout projectedLayout = null;
            Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
            if (useOrcColumnNames || isFullAcid) {
                String columnName = column.getName().toLowerCase(ENGLISH);
                orcColumn = fileColumnsByName.get(columnName);
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            } else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            }
            Type readType = column.getType();
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                fileReadLayouts.add(projectedLayout);
                // Add predicates on top-level and nested columns
                for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
                    OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
                    if (nestedColumn != null) {
                        predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
                    }
                }
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
        Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
        Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
        if (transaction.isDelete()) {
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
            } else {
                columnAdaptations.add(ColumnAdaptation.rowIdColumn());
            }
        } else if (transaction.isUpdate()) {
            HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
            List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
            } else {
                columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
            }
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new TrinoException(HIVE_MISSING_DATA, message, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CANNOT_OPEN_SPLIT(io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) HiveSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Slice(io.airlift.slice.Slice) STRUCT(io.trino.orc.metadata.OrcType.OrcTypeKind.STRUCT) ColumnAdaptation.updatedRowColumns(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumns) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) LONG(io.trino.orc.metadata.OrcType.OrcTypeKind.LONG) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) INT(io.trino.orc.metadata.OrcType.OrcTypeKind.INT) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) Properties(java.util.Properties) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) HivePageSourceProvider.projectBaseColumns(io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns) HiveSessionProperties.isOrcNestedLazy(io.trino.plugin.hive.HiveSessionProperties.isOrcNestedLazy) OrcColumn(io.trino.orc.OrcColumn) HIVE_MISSING_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) OrcRecordReader(io.trino.orc.OrcRecordReader) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ImmutableMap(com.google.common.collect.ImmutableMap) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) PRESTO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.PRESTO_WRITER_ID) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) Pattern(java.util.regex.Pattern) TRINO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.TRINO_WRITER_ID) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HiveUtil.isDeserializerClass(io.trino.plugin.hive.util.HiveUtil.isDeserializerClass) Type(io.trino.spi.type.Type) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) OptionalInt(java.util.OptionalInt) Inject(javax.inject.Inject) HiveSessionProperties.getOrcStreamBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) ImmutableList(com.google.common.collect.ImmutableList) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) HiveSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) HiveSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) OrcReader(io.trino.orc.OrcReader) HiveSessionProperties.getOrcMaxBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) Maps(com.google.common.collect.Maps) HiveSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) ColumnAdaptation.updatedRowColumnsWithOriginalFiles(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumnsWithOriginalFiles) AcidInfo(io.trino.plugin.hive.AcidInfo) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) Collectors.toList(java.util.stream.Collectors.toList) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) HIVE_FILE_MISSING_COLUMN_NAMES(io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) FileSystem(org.apache.hadoop.fs.FileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Optional(java.util.Optional) OrcColumn(io.trino.orc.OrcColumn) OrcReader(io.trino.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) OrcDataSource(io.trino.orc.OrcDataSource) OrcDataSourceId(io.trino.orc.OrcDataSourceId) IOException(java.io.IOException) OrcRecordReader(io.trino.orc.OrcRecordReader) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) Type(io.trino.spi.type.Type) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) TrinoException(io.trino.spi.TrinoException) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout)

Aggregations

Type (io.trino.spi.type.Type)688 Test (org.testng.annotations.Test)266 ArrayType (io.trino.spi.type.ArrayType)218 ImmutableList (com.google.common.collect.ImmutableList)191 RowType (io.trino.spi.type.RowType)177 List (java.util.List)155 VarcharType (io.trino.spi.type.VarcharType)134 Page (io.trino.spi.Page)126 ArrayList (java.util.ArrayList)126 VarcharType.createUnboundedVarcharType (io.trino.spi.type.VarcharType.createUnboundedVarcharType)114 Block (io.trino.spi.block.Block)110 MapType (io.trino.spi.type.MapType)107 DecimalType (io.trino.spi.type.DecimalType)102 TrinoException (io.trino.spi.TrinoException)98 Optional (java.util.Optional)98 Map (java.util.Map)97 ImmutableMap (com.google.common.collect.ImmutableMap)93 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)92 DecimalType.createDecimalType (io.trino.spi.type.DecimalType.createDecimalType)86 BlockBuilder (io.trino.spi.block.BlockBuilder)72