Search in sources :

Example 1 with ReaderRecordCursorWithProjections

use of io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections in project trino by trinodb.

the class AbstractFileFormat method createPageSource.

static ConnectorPageSource createPageSource(HiveRecordCursorProvider cursorProvider, ConnectorSession session, File targetFile, List<String> columnNames, List<Type> columnTypes, HiveStorageFormat format) {
    checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes should have the same size");
    List<HiveColumnHandle> readColumns = getBaseColumns(columnNames, columnTypes);
    Optional<ReaderRecordCursorWithProjections> recordCursorWithProjections = cursorProvider.createRecordCursor(conf, session, new Path(targetFile.getAbsolutePath()), 0, targetFile.length(), targetFile.length(), createSchema(format, columnNames, columnTypes), readColumns, TupleDomain.all(), TESTING_TYPE_MANAGER, false);
    checkState(recordCursorWithProjections.isPresent(), "readerPageSourceWithProjections is not present");
    checkState(recordCursorWithProjections.get().getProjectedReaderColumns().isEmpty(), "projection should not be required");
    return new RecordPageSource(columnTypes, recordCursorWithProjections.get().getRecordCursor());
}
Also used : Path(org.apache.hadoop.fs.Path) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) RecordPageSource(io.trino.spi.connector.RecordPageSource)

Example 2 with ReaderRecordCursorWithProjections

use of io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections in project trino by trinodb.

the class HivePageSourceProvider method createHivePageSource.

public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
    if (effectivePredicate.isNone()) {
        return Optional.of(new EmptyPageSource());
    }
    List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
    Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
    Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
    for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
        Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
        if (readerWithProjections.isPresent()) {
            ConnectorPageSource pageSource = readerWithProjections.get().get();
            Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
            Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
            if (readerProjections.isPresent()) {
                adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
            }
            return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
        }
    }
    for (HiveRecordCursorProvider provider : cursorProviders) {
        // GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
        boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
        List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
        Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
        if (readerWithProjections.isPresent()) {
            RecordCursor delegate = readerWithProjections.get().getRecordCursor();
            Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
            if (projections.isPresent()) {
                ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
                delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
            }
            checkArgument(acidInfo.isEmpty(), "Acid is not supported");
            if (bucketAdaptation.isPresent()) {
                delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
            }
            // Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
            if (doCoercion) {
                delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
            }
            // bucket adaptation already validates that data is in the right bucket
            if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
                delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
            }
            HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
            List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
            return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
        }
    }
    return Optional.empty();
}
Also used : BucketValidator(io.trino.plugin.hive.HivePageSource.BucketValidator) RecordCursor(io.trino.spi.connector.RecordCursor) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RecordPageSource(io.trino.spi.connector.RecordPageSource) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) Type(io.trino.spi.type.Type) OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType(io.trino.plugin.hive.orc.OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType) OrcType(io.trino.orc.metadata.OrcType) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections)

Aggregations

ReaderRecordCursorWithProjections (io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections)2 RecordPageSource (io.trino.spi.connector.RecordPageSource)2 OrcType (io.trino.orc.metadata.OrcType)1 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)1 BucketValidator (io.trino.plugin.hive.HivePageSource.BucketValidator)1 OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType (io.trino.plugin.hive.orc.OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType)1 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)1 EmptyPageSource (io.trino.spi.connector.EmptyPageSource)1 RecordCursor (io.trino.spi.connector.RecordCursor)1 Type (io.trino.spi.type.Type)1 Path (org.apache.hadoop.fs.Path)1