Examples with HiveColumnProjectionInfo - io.trino.plugin.hive.HiveColumnProjectionInfo

Example 1 with HiveColumnProjectionInfo

use of io.trino.plugin.hive.HiveColumnProjectionInfo in project trino by trinodb.

the class OrcPageSourceFactory method getNestedColumn.

private static OrcColumn getNestedColumn(OrcColumn baseColumn, Optional<HiveColumnProjectionInfo> projectionInfo) {
    if (projectionInfo.isEmpty()) {
        return baseColumn;
    }
    OrcColumn current = baseColumn;
    for (String field : projectionInfo.get().getDereferenceNames()) {
        Optional<OrcColumn> orcColumn = current.getNestedColumns().stream().filter(column -> column.getColumnName().toLowerCase(ENGLISH).equals(field)).findFirst();
        if (orcColumn.isEmpty()) {
            return null;
        }
        current = orcColumn.get();
    }
    return current;
}

Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CANNOT_OPEN_SPLIT(io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) HiveSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Slice(io.airlift.slice.Slice) STRUCT(io.trino.orc.metadata.OrcType.OrcTypeKind.STRUCT) ColumnAdaptation.updatedRowColumns(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumns) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) LONG(io.trino.orc.metadata.OrcType.OrcTypeKind.LONG) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) INT(io.trino.orc.metadata.OrcType.OrcTypeKind.INT) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) Properties(java.util.Properties) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) HivePageSourceProvider.projectBaseColumns(io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns) HiveSessionProperties.isOrcNestedLazy(io.trino.plugin.hive.HiveSessionProperties.isOrcNestedLazy) OrcColumn(io.trino.orc.OrcColumn) HIVE_MISSING_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) OrcRecordReader(io.trino.orc.OrcRecordReader) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ImmutableMap(com.google.common.collect.ImmutableMap) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) PRESTO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.PRESTO_WRITER_ID) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) Pattern(java.util.regex.Pattern) TRINO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.TRINO_WRITER_ID) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HiveUtil.isDeserializerClass(io.trino.plugin.hive.util.HiveUtil.isDeserializerClass) Type(io.trino.spi.type.Type) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) OptionalInt(java.util.OptionalInt) Inject(javax.inject.Inject) HiveSessionProperties.getOrcStreamBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) ImmutableList(com.google.common.collect.ImmutableList) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) HiveSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) HiveSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) OrcReader(io.trino.orc.OrcReader) HiveSessionProperties.getOrcMaxBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) Maps(com.google.common.collect.Maps) HiveSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) ColumnAdaptation.updatedRowColumnsWithOriginalFiles(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumnsWithOriginalFiles) AcidInfo(io.trino.plugin.hive.AcidInfo) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) Collectors.toList(java.util.stream.Collectors.toList) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) HIVE_FILE_MISSING_COLUMN_NAMES(io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) OrcColumn(io.trino.orc.OrcColumn)

Example 2 with HiveColumnProjectionInfo

use of io.trino.plugin.hive.HiveColumnProjectionInfo in project trino by trinodb.

the class OrcPageSourceFactory method createOrcPageSource.

private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
    try {
        Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
        if (optionalOrcReader.isEmpty()) {
            return new EmptyPageSource();
        }
        OrcReader reader = optionalOrcReader.get();
        if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
            validateOrcAcidVersion(path, reader);
        }
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
        List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
        List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
        List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
        if (isFullAcid && !originalFilesPresent) {
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
        Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        } else {
            projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            OrcReader.ProjectedLayout projectedLayout = null;
            Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
            if (useOrcColumnNames || isFullAcid) {
                String columnName = column.getName().toLowerCase(ENGLISH);
                orcColumn = fileColumnsByName.get(columnName);
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            } else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            }
            Type readType = column.getType();
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                fileReadLayouts.add(projectedLayout);
                // Add predicates on top-level and nested columns
                for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
                    OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
                    if (nestedColumn != null) {
                        predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
                    }
                }
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
        Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
        Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
        if (transaction.isDelete()) {
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
            } else {
                columnAdaptations.add(ColumnAdaptation.rowIdColumn());
            }
        } else if (transaction.isUpdate()) {
            HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
            List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
            } else {
                columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
            }
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new TrinoException(HIVE_MISSING_DATA, message, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}

Example 3 with HiveColumnProjectionInfo

use of io.trino.plugin.hive.HiveColumnProjectionInfo in project trino by trinodb.

the class TestParquetPageSourceFactory method testGetNestedMixedRepetitionColumnType.

@Test
public void testGetNestedMixedRepetitionColumnType() {
    RowType rowType = rowType(RowType.field("optional_level2", rowType(RowType.field("required_level3", IntegerType.INTEGER))));
    HiveColumnHandle columnHandle = new HiveColumnHandle("optional_level1", 0, HiveType.valueOf("struct<optional_level2:struct<required_level3:int>>"), rowType, Optional.of(new HiveColumnProjectionInfo(ImmutableList.of(1, 1), ImmutableList.of("optional_level2", "required_level3"), toHiveType(IntegerType.INTEGER), IntegerType.INTEGER)), REGULAR, Optional.empty());
    MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "optional_level1", new GroupType(OPTIONAL, "optional_level2", new PrimitiveType(REQUIRED, INT32, "required_level3"))));
    assertEquals(ParquetPageSourceFactory.getColumnType(columnHandle, fileSchema, true).get(), fileSchema.getType("optional_level1"));
}

Also used : GroupType(org.apache.parquet.schema.GroupType) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) RowType(io.trino.spi.type.RowType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) MessageType(org.apache.parquet.schema.MessageType) Test(org.testng.annotations.Test)

Example 4 with HiveColumnProjectionInfo

use of io.trino.plugin.hive.HiveColumnProjectionInfo in project trino by trinodb.

the class TestConnectorPushdownRulesWithHive method testPushdownWithDuplicateExpressions.

@Test
public void testPushdownWithDuplicateExpressions() {
    String tableName = "duplicate_expressions";
    tester().getQueryRunner().execute(format("CREATE TABLE  %s (struct_of_bigint, just_bigint) AS SELECT cast(row(5, 6) AS row(a bigint, b bigint)) AS struct_of_int, 5 AS just_bigint WHERE false", tableName));
    PushProjectionIntoTableScan pushProjectionIntoTableScan = new PushProjectionIntoTableScan(tester().getPlannerContext(), tester().getTypeAnalyzer(), new ScalarStatsCalculator(tester().getPlannerContext(), tester().getTypeAnalyzer()));
    HiveTableHandle hiveTable = new HiveTableHandle(SCHEMA_NAME, tableName, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
    TableHandle table = new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable, new HiveTransactionHandle(false));
    HiveColumnHandle bigintColumn = createBaseColumn("just_bigint", 1, toHiveType(BIGINT), BIGINT, REGULAR, Optional.empty());
    HiveColumnHandle partialColumn = new HiveColumnHandle("struct_of_bigint", 0, toHiveType(ROW_TYPE), ROW_TYPE, Optional.of(new HiveColumnProjectionInfo(ImmutableList.of(0), ImmutableList.of("a"), toHiveType(BIGINT), BIGINT)), REGULAR, Optional.empty());
    // Test projection pushdown with duplicate column references
    tester().assertThat(pushProjectionIntoTableScan).on(p -> {
        SymbolReference column = p.symbol("just_bigint", BIGINT).toSymbolReference();
        Expression negation = new ArithmeticUnaryExpression(MINUS, column);
        return p.project(Assignments.of(// The column reference is part of both the assignments
        p.symbol("column_ref", BIGINT), column, p.symbol("negated_column_ref", BIGINT), negation), p.tableScan(table, ImmutableList.of(p.symbol("just_bigint", BIGINT)), ImmutableMap.of(p.symbol("just_bigint", BIGINT), bigintColumn)));
    }).matches(project(ImmutableMap.of("column_ref", expression("just_bigint_0"), "negated_column_ref", expression("- just_bigint_0")), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(bigintColumn))::equals, TupleDomain.all(), ImmutableMap.of("just_bigint_0", bigintColumn::equals))));
    // Test Dereference pushdown
    tester().assertThat(pushProjectionIntoTableScan).on(p -> {
        SubscriptExpression subscript = new SubscriptExpression(p.symbol("struct_of_bigint", ROW_TYPE).toSymbolReference(), new LongLiteral("1"));
        Expression sum = new ArithmeticBinaryExpression(ADD, subscript, new LongLiteral("2"));
        return p.project(Assignments.of(// The subscript expression instance is part of both the assignments
        p.symbol("expr_deref", BIGINT), subscript, p.symbol("expr_deref_2", BIGINT), sum), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_bigint", ROW_TYPE)), ImmutableMap.of(p.symbol("struct_of_bigint", ROW_TYPE), partialColumn.getBaseColumn())));
    }).matches(project(ImmutableMap.of("expr_deref", expression(new SymbolReference("struct_of_bigint#a")), "expr_deref_2", expression(new ArithmeticBinaryExpression(ADD, new SymbolReference("struct_of_bigint#a"), new LongLiteral("2")))), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(partialColumn))::equals, TupleDomain.all(), ImmutableMap.of("struct_of_bigint#a", partialColumn::equals))));
    metastore.dropTable(SCHEMA_NAME, tableName, true);
}

Also used : MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Database(io.trino.plugin.hive.metastore.Database) Test(org.testng.annotations.Test) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) CatalogName(io.trino.connector.CatalogName) ArithmeticUnaryExpression(io.trino.sql.tree.ArithmeticUnaryExpression) LongLiteral(io.trino.sql.tree.LongLiteral) Arrays.asList(java.util.Arrays.asList) PlanBuilder(io.trino.sql.planner.iterative.rule.test.PlanBuilder) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) PlanMatchPattern.expression(io.trino.sql.planner.assertions.PlanMatchPattern.expression) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) Assignments(io.trino.sql.planner.plan.Assignments) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) ScalarStatsCalculator(io.trino.cost.ScalarStatsCalculator) TestingHiveConnectorFactory(io.trino.plugin.hive.TestingHiveConnectorFactory) String.format(java.lang.String.format) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) PlanMatchPattern.strictProject(io.trino.sql.planner.assertions.PlanMatchPattern.strictProject) ADD(io.trino.sql.tree.ArithmeticBinaryExpression.Operator.ADD) BIGINT(io.trino.spi.type.BigintType.BIGINT) SymbolReference(io.trino.sql.tree.SymbolReference) PushProjectionIntoTableScan(io.trino.sql.planner.iterative.rule.PushProjectionIntoTableScan) HdfsConfig(io.trino.plugin.hive.HdfsConfig) PruneTableScanColumns(io.trino.sql.planner.iterative.rule.PruneTableScanColumns) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Optional(java.util.Optional) Expression(io.trino.sql.tree.Expression) RowType.field(io.trino.spi.type.RowType.field) MINUS(io.trino.sql.tree.ArithmeticUnaryExpression.Sign.MINUS) Session(io.trino.Session) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) PushPredicateIntoTableScan(io.trino.sql.planner.iterative.rule.PushPredicateIntoTableScan) Type(io.trino.spi.type.Type) PlanMatchPattern.filter(io.trino.sql.planner.assertions.PlanMatchPattern.filter) SubscriptExpression(io.trino.sql.tree.SubscriptExpression) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NodeVersion(io.trino.plugin.hive.NodeVersion) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) LocalQueryRunner(io.trino.testing.LocalQueryRunner) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) ArithmeticBinaryExpression(io.trino.sql.tree.ArithmeticBinaryExpression) Symbol(io.trino.sql.planner.Symbol) AfterClass(org.testng.annotations.AfterClass) BaseRuleTest(io.trino.sql.planner.iterative.rule.test.BaseRuleTest) PrincipalType(io.trino.spi.security.PrincipalType) IOException(java.io.IOException) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) File(java.io.File) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) TableHandle(io.trino.metadata.TableHandle) PlanMatchPattern.project(io.trino.sql.planner.assertions.PlanMatchPattern.project) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) PlanMatchPattern.tableScan(io.trino.sql.planner.assertions.PlanMatchPattern.tableScan) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) ArithmeticBinaryExpression(io.trino.sql.tree.ArithmeticBinaryExpression) LongLiteral(io.trino.sql.tree.LongLiteral) SymbolReference(io.trino.sql.tree.SymbolReference) ScalarStatsCalculator(io.trino.cost.ScalarStatsCalculator) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) ArithmeticUnaryExpression(io.trino.sql.tree.ArithmeticUnaryExpression) Expression(io.trino.sql.tree.Expression) SubscriptExpression(io.trino.sql.tree.SubscriptExpression) ArithmeticBinaryExpression(io.trino.sql.tree.ArithmeticBinaryExpression) PushProjectionIntoTableScan(io.trino.sql.planner.iterative.rule.PushProjectionIntoTableScan) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) SubscriptExpression(io.trino.sql.tree.SubscriptExpression) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) TableHandle(io.trino.metadata.TableHandle) CatalogName(io.trino.connector.CatalogName) ArithmeticUnaryExpression(io.trino.sql.tree.ArithmeticUnaryExpression) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test) BaseRuleTest(io.trino.sql.planner.iterative.rule.test.BaseRuleTest)

Example 5 with HiveColumnProjectionInfo

use of io.trino.plugin.hive.HiveColumnProjectionInfo in project trino by trinodb.

the class TestConnectorPushdownRulesWithHive method testProjectionPushdown.

@Test
public void testProjectionPushdown() {
    String tableName = "projection_test";
    PushProjectionIntoTableScan pushProjectionIntoTableScan = new PushProjectionIntoTableScan(tester().getPlannerContext(), tester().getTypeAnalyzer(), new ScalarStatsCalculator(tester().getPlannerContext(), tester().getTypeAnalyzer()));
    tester().getQueryRunner().execute(format("CREATE TABLE  %s (struct_of_int) AS " + "SELECT cast(row(5, 6) as row(a bigint, b bigint)) as struct_of_int where false", tableName));
    Type baseType = ROW_TYPE;
    HiveColumnHandle partialColumn = new HiveColumnHandle("struct_of_int", 0, toHiveType(baseType), baseType, Optional.of(new HiveColumnProjectionInfo(ImmutableList.of(0), ImmutableList.of("a"), toHiveType(BIGINT), BIGINT)), REGULAR, Optional.empty());
    HiveTableHandle hiveTable = new HiveTableHandle(SCHEMA_NAME, tableName, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
    TableHandle table = new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable, new HiveTransactionHandle(false));
    HiveColumnHandle fullColumn = partialColumn.getBaseColumn();
    // Test projected columns pushdown to HiveTableHandle in case of full column references
    tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("struct_of_int", baseType), p.symbol("struct_of_int", baseType).toSymbolReference()), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).matches(project(ImmutableMap.of("expr", expression("col")), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(fullColumn))::equals, TupleDomain.all(), ImmutableMap.of("col", fullColumn::equals))));
    // Rule should return Optional.empty after projected ColumnHandles have been added to HiveTableHandle
    tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("struct_of_int", baseType), p.symbol("struct_of_int", baseType).toSymbolReference()), p.tableScan(new TableHandle(new CatalogName(HIVE_CATALOG_NAME), hiveTable.withProjectedColumns(ImmutableSet.of(fullColumn)), new HiveTransactionHandle(false)), ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).doesNotFire();
    // Test Dereference pushdown
    tester().assertThat(pushProjectionIntoTableScan).on(p -> p.project(Assignments.of(p.symbol("expr_deref", BIGINT), new SubscriptExpression(p.symbol("struct_of_int", baseType).toSymbolReference(), new LongLiteral("1"))), p.tableScan(table, ImmutableList.of(p.symbol("struct_of_int", baseType)), ImmutableMap.of(p.symbol("struct_of_int", baseType), fullColumn)))).matches(project(ImmutableMap.of("expr_deref", expression(new SymbolReference("struct_of_int#a"))), tableScan(hiveTable.withProjectedColumns(ImmutableSet.of(partialColumn))::equals, TupleDomain.all(), ImmutableMap.of("struct_of_int#a", partialColumn::equals))));
    metastore.dropTable(SCHEMA_NAME, tableName, true);
}

Also used : MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Database(io.trino.plugin.hive.metastore.Database) Test(org.testng.annotations.Test) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) CatalogName(io.trino.connector.CatalogName) ArithmeticUnaryExpression(io.trino.sql.tree.ArithmeticUnaryExpression) LongLiteral(io.trino.sql.tree.LongLiteral) Arrays.asList(java.util.Arrays.asList) PlanBuilder(io.trino.sql.planner.iterative.rule.test.PlanBuilder) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) PlanMatchPattern.expression(io.trino.sql.planner.assertions.PlanMatchPattern.expression) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) Assignments(io.trino.sql.planner.plan.Assignments) FileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore) ScalarStatsCalculator(io.trino.cost.ScalarStatsCalculator) TestingHiveConnectorFactory(io.trino.plugin.hive.TestingHiveConnectorFactory) String.format(java.lang.String.format) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) PlanMatchPattern.strictProject(io.trino.sql.planner.assertions.PlanMatchPattern.strictProject) ADD(io.trino.sql.tree.ArithmeticBinaryExpression.Operator.ADD) BIGINT(io.trino.spi.type.BigintType.BIGINT) SymbolReference(io.trino.sql.tree.SymbolReference) PushProjectionIntoTableScan(io.trino.sql.planner.iterative.rule.PushProjectionIntoTableScan) HdfsConfig(io.trino.plugin.hive.HdfsConfig) PruneTableScanColumns(io.trino.sql.planner.iterative.rule.PruneTableScanColumns) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Optional(java.util.Optional) Expression(io.trino.sql.tree.Expression) RowType.field(io.trino.spi.type.RowType.field) MINUS(io.trino.sql.tree.ArithmeticUnaryExpression.Sign.MINUS) Session(io.trino.Session) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) PushPredicateIntoTableScan(io.trino.sql.planner.iterative.rule.PushPredicateIntoTableScan) Type(io.trino.spi.type.Type) PlanMatchPattern.filter(io.trino.sql.planner.assertions.PlanMatchPattern.filter) SubscriptExpression(io.trino.sql.tree.SubscriptExpression) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NodeVersion(io.trino.plugin.hive.NodeVersion) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) LocalQueryRunner(io.trino.testing.LocalQueryRunner) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) ArithmeticBinaryExpression(io.trino.sql.tree.ArithmeticBinaryExpression) Symbol(io.trino.sql.planner.Symbol) AfterClass(org.testng.annotations.AfterClass) BaseRuleTest(io.trino.sql.planner.iterative.rule.test.BaseRuleTest) PrincipalType(io.trino.spi.security.PrincipalType) IOException(java.io.IOException) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) File(java.io.File) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) TableHandle(io.trino.metadata.TableHandle) PlanMatchPattern.project(io.trino.sql.planner.assertions.PlanMatchPattern.project) FileHiveMetastoreConfig(io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) PlanMatchPattern.tableScan(io.trino.sql.planner.assertions.PlanMatchPattern.tableScan) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) LongLiteral(io.trino.sql.tree.LongLiteral) SymbolReference(io.trino.sql.tree.SymbolReference) ScalarStatsCalculator(io.trino.cost.ScalarStatsCalculator) RowType(io.trino.spi.type.RowType) Type(io.trino.spi.type.Type) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) PrincipalType(io.trino.spi.security.PrincipalType) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PushProjectionIntoTableScan(io.trino.sql.planner.iterative.rule.PushProjectionIntoTableScan) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) SubscriptExpression(io.trino.sql.tree.SubscriptExpression) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) TableHandle(io.trino.metadata.TableHandle) CatalogName(io.trino.connector.CatalogName) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test) BaseRuleTest(io.trino.sql.planner.iterative.rule.test.BaseRuleTest)

Aggregations

HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)5 HiveColumnProjectionInfo (io.trino.plugin.hive.HiveColumnProjectionInfo)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)4 REGULAR (io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR)4 RowType (io.trino.spi.type.RowType)3 Test (org.testng.annotations.Test)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 Strings.nullToEmpty (com.google.common.base.Strings.nullToEmpty)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Maps (com.google.common.collect.Maps)2 Maps.uniqueIndex (com.google.common.collect.Maps.uniqueIndex)2 Files (com.google.common.io.Files)2 MoreFiles.deleteRecursively (com.google.common.io.MoreFiles.deleteRecursively)2 ALLOW_INSECURE (com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE)2 Slice (io.airlift.slice.Slice)2 Session (io.trino.Session)2