Search in sources :

Example 56 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class InformationSchemaMetadata method calculatePrefixesWithTableName.

private Set<QualifiedTablePrefix> calculatePrefixesWithTableName(InformationSchemaTable informationSchemaTable, ConnectorSession connectorSession, Set<QualifiedTablePrefix> prefixes, TupleDomain<ColumnHandle> constraint, Optional<Predicate<Map<ColumnHandle, NullableValue>>> predicate) {
    Session session = ((FullConnectorSession) connectorSession).getSession();
    Optional<Set<String>> tables = filterString(constraint, TABLE_NAME_COLUMN_HANDLE);
    if (tables.isPresent()) {
        return prefixes.stream().peek(prefix -> verify(prefix.asQualifiedObjectName().isEmpty())).flatMap(prefix -> prefix.getSchemaName().map(schemaName -> Stream.of(prefix)).orElseGet(() -> listSchemaNames(session))).flatMap(prefix -> tables.get().stream().filter(this::isLowerCase).map(table -> new QualifiedObjectName(catalogName, prefix.getSchemaName().get(), table))).filter(objectName -> {
            if (!isColumnsEnumeratingTable(informationSchemaTable) || metadata.isMaterializedView(session, objectName) || metadata.isView(session, objectName)) {
                return true;
            }
            // This is a columns enumerating table and the object is not a view
            try {
                // filtering in case the source table does not exist or there is a problem with redirection.
                return metadata.getRedirectionAwareTableHandle(session, objectName).getTableHandle().isPresent();
            } catch (TrinoException e) {
                if (e.getErrorCode().equals(TABLE_REDIRECTION_ERROR.toErrorCode())) {
                    // Ignore redirection errors for listing, treat as if the table does not exist
                    return false;
                }
                throw e;
            }
        }).filter(objectName -> predicate.isEmpty() || predicate.get().test(asFixedValues(objectName))).map(QualifiedObjectName::asQualifiedTablePrefix).collect(toImmutableSet());
    }
    if (predicate.isEmpty() || !isColumnsEnumeratingTable(informationSchemaTable)) {
        return prefixes;
    }
    return prefixes.stream().flatMap(prefix -> Stream.concat(metadata.listTables(session, prefix).stream(), metadata.listViews(session, prefix).stream())).filter(objectName -> predicate.get().test(asFixedValues(objectName))).map(QualifiedObjectName::asQualifiedTablePrefix).collect(toImmutableSet());
}
Also used : Arrays(java.util.Arrays) VIEWS(io.trino.connector.informationschema.InformationSchemaTable.VIEWS) TABLES(io.trino.connector.informationschema.InformationSchemaTable.TABLES) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ENGLISH(java.util.Locale.ENGLISH) MetadataUtil.findColumnMetadata(io.trino.metadata.MetadataUtil.findColumnMetadata) TABLE_REDIRECTION_ERROR(io.trino.spi.StandardErrorCode.TABLE_REDIRECTION_ERROR) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Range(io.trino.spi.predicate.Range) ROLE_AUTHORIZATION_DESCRIPTORS(io.trino.connector.informationschema.InformationSchemaTable.ROLE_AUTHORIZATION_DESCRIPTORS) Predicate(java.util.function.Predicate) Collections.emptyList(java.util.Collections.emptyList) Domain(io.trino.spi.predicate.Domain) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) LimitApplicationResult(io.trino.spi.connector.LimitApplicationResult) SchemaTableName(io.trino.spi.connector.SchemaTableName) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) INFORMATION_SCHEMA(io.trino.connector.informationschema.InformationSchemaTable.INFORMATION_SCHEMA) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Session(io.trino.Session) Constraint(io.trino.spi.connector.Constraint) Slice(io.airlift.slice.Slice) FullConnectorSession(io.trino.FullConnectorSession) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TABLE_PRIVILEGES(io.trino.connector.informationschema.InformationSchemaTable.TABLE_PRIVILEGES) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) OptionalLong(java.util.OptionalLong) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) COLUMNS(io.trino.connector.informationschema.InformationSchemaTable.COLUMNS) QualifiedTablePrefix(io.trino.metadata.QualifiedTablePrefix) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Metadata(io.trino.metadata.Metadata) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) TrinoException(io.trino.spi.TrinoException) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Session(io.trino.Session) FullConnectorSession(io.trino.FullConnectorSession) ConnectorSession(io.trino.spi.connector.ConnectorSession) FullConnectorSession(io.trino.FullConnectorSession)

Example 57 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class PruneTableScanColumns method pruneColumns.

public static Optional<PlanNode> pruneColumns(Metadata metadata, TypeProvider types, Session session, TableScanNode node, Set<Symbol> referencedOutputs) {
    List<Symbol> newOutputs = filteredCopy(node.getOutputSymbols(), referencedOutputs::contains);
    if (newOutputs.size() == node.getOutputSymbols().size()) {
        return Optional.empty();
    }
    List<ConnectorExpression> projections = newOutputs.stream().map(symbol -> new Variable(symbol.getName(), types.get(symbol))).collect(toImmutableList());
    TableHandle handle = node.getTable();
    Optional<ProjectionApplicationResult<TableHandle>> result = metadata.applyProjection(session, handle, projections, newOutputs.stream().collect(toImmutableMap(Symbol::getName, node.getAssignments()::get)));
    Map<Symbol, ColumnHandle> newAssignments;
    // Bail out if the connector does anything other than limit the list of columns (e.g., if it synthesizes arbitrary expressions)
    if (result.isPresent() && result.get().getProjections().stream().allMatch(Variable.class::isInstance)) {
        handle = result.get().getHandle();
        Map<String, ColumnHandle> assignments = result.get().getAssignments().stream().collect(toImmutableMap(Assignment::getVariable, Assignment::getColumn));
        ImmutableMap.Builder<Symbol, ColumnHandle> builder = ImmutableMap.builder();
        for (int i = 0; i < newOutputs.size(); i++) {
            Variable variable = (Variable) result.get().getProjections().get(i);
            builder.put(newOutputs.get(i), assignments.get(variable.getName()));
        }
        newAssignments = builder.buildOrThrow();
    } else {
        newAssignments = newOutputs.stream().collect(toImmutableMap(Function.identity(), node.getAssignments()::get));
    }
    Set<ColumnHandle> visibleColumns = ImmutableSet.copyOf(newAssignments.values());
    TupleDomain<ColumnHandle> enforcedConstraint = node.getEnforcedConstraint().filter((columnHandle, domain) -> visibleColumns.contains(columnHandle));
    Optional<PlanNodeStatsEstimate> newStatistics = node.getStatistics().map(statistics -> new PlanNodeStatsEstimate(statistics.getOutputRowCount(), statistics.getSymbolStatistics().entrySet().stream().filter(entry -> newAssignments.containsKey(entry.getKey())).collect(toImmutableMap(Entry::getKey, Entry::getValue))));
    return Optional.of(new TableScanNode(node.getId(), handle, newOutputs, newAssignments, enforcedConstraint, newStatistics, node.isUpdateTarget(), node.getUseConnectorNodePartitioning()));
}
Also used : Variable(io.trino.spi.expression.Variable) Function(java.util.function.Function) PlanNode(io.trino.sql.planner.plan.PlanNode) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) TableScanNode(io.trino.sql.planner.plan.TableScanNode) PlanNodeStatsEstimate(io.trino.cost.PlanNodeStatsEstimate) Symbol(io.trino.sql.planner.Symbol) MoreLists.filteredCopy(io.trino.util.MoreLists.filteredCopy) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TupleDomain(io.trino.spi.predicate.TupleDomain) Patterns.tableScan(io.trino.sql.planner.plan.Patterns.tableScan) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableHandle(io.trino.metadata.TableHandle) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) Assignment(io.trino.spi.connector.Assignment) Entry(java.util.Map.Entry) Metadata(io.trino.metadata.Metadata) TypeProvider(io.trino.sql.planner.TypeProvider) Optional(java.util.Optional) Session(io.trino.Session) ColumnHandle(io.trino.spi.connector.ColumnHandle) Variable(io.trino.spi.expression.Variable) PlanNodeStatsEstimate(io.trino.cost.PlanNodeStatsEstimate) Symbol(io.trino.sql.planner.Symbol) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableScanNode(io.trino.sql.planner.plan.TableScanNode) TableHandle(io.trino.metadata.TableHandle)

Example 58 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class DeltaLakeSplitManager method getSplits.

private Stream<DeltaLakeSplit> getSplits(ConnectorTransactionHandle transaction, DeltaLakeTableHandle tableHandle, ConnectorSession session, Optional<DataSize> maxScannedFileSize, Set<ColumnHandle> columnsCoveredByDynamicFilter, Constraint constraint) {
    DeltaLakeMetastore metastore = getMetastore(session, transaction);
    String tableLocation = metastore.getTableLocation(tableHandle.getSchemaTableName(), session);
    List<AddFileEntry> validDataFiles = metastore.getValidDataFiles(tableHandle.getSchemaTableName(), session);
    TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint();
    TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint = tableHandle.getNonPartitionConstraint();
    // Delta Lake handles updates and deletes by copying entire data files, minus updates/deletes. Because of this we can only have one Split/UpdatablePageSource
    // per file.
    boolean splittable = tableHandle.getWriteType().isEmpty();
    AtomicInteger remainingInitialSplits = new AtomicInteger(maxInitialSplits);
    Optional<Instant> filesModifiedAfter = tableHandle.getAnalyzeHandle().flatMap(AnalyzeHandle::getFilesModifiedAfter);
    Optional<Long> maxScannedFileSizeInBytes = maxScannedFileSize.map(DataSize::toBytes);
    Set<String> predicatedColumnNames = Stream.concat(nonPartitionConstraint.getDomains().orElseThrow().keySet().stream(), columnsCoveredByDynamicFilter.stream().map(DeltaLakeColumnHandle.class::cast)).map(// TODO is DeltaLakeColumnHandle.name normalized?
    column -> column.getName().toLowerCase(ENGLISH)).collect(toImmutableSet());
    List<ColumnMetadata> schema = extractSchema(tableHandle.getMetadataEntry(), typeManager);
    List<ColumnMetadata> predicatedColumns = schema.stream().filter(// ColumnMetadata.name is lowercase
    column -> predicatedColumnNames.contains(column.getName())).collect(toImmutableList());
    return validDataFiles.stream().flatMap(addAction -> {
        if (tableHandle.getAnalyzeHandle().isPresent() && !tableHandle.getAnalyzeHandle().get().isInitialAnalyze() && !addAction.isDataChange()) {
            // skip files which do not introduce data change on non-initial ANALYZE
            return Stream.empty();
        }
        if (filesModifiedAfter.isPresent() && addAction.getModificationTime() <= filesModifiedAfter.get().toEpochMilli()) {
            return Stream.empty();
        }
        if (maxScannedFileSizeInBytes.isPresent() && addAction.getSize() > maxScannedFileSizeInBytes.get()) {
            return Stream.empty();
        }
        Map<DeltaLakeColumnHandle, Domain> enforcedDomains = enforcedPartitionConstraint.getDomains().orElseThrow();
        if (!partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), enforcedDomains)) {
            return Stream.empty();
        }
        TupleDomain<DeltaLakeColumnHandle> statisticsPredicate = createStatisticsPredicate(addAction, predicatedColumns, tableHandle.getMetadataEntry().getCanonicalPartitionColumns());
        if (!nonPartitionConstraint.overlaps(statisticsPredicate)) {
            return Stream.empty();
        }
        if (constraint.predicate().isPresent()) {
            Map<String, Optional<String>> partitionValues = addAction.getCanonicalPartitionValues();
            Map<ColumnHandle, NullableValue> deserializedValues = constraint.getPredicateColumns().orElseThrow().stream().filter(column -> column instanceof DeltaLakeColumnHandle).filter(column -> partitionValues.containsKey(((DeltaLakeColumnHandle) column).getName())).collect(toImmutableMap(identity(), column -> {
                DeltaLakeColumnHandle deltaLakeColumn = (DeltaLakeColumnHandle) column;
                return NullableValue.of(deltaLakeColumn.getType(), deserializePartitionValue(deltaLakeColumn, addAction.getCanonicalPartitionValues().get(deltaLakeColumn.getName())));
            }));
            if (!constraint.predicate().get().test(deserializedValues)) {
                return Stream.empty();
            }
        }
        return splitsForFile(session, addAction, tableLocation, addAction.getCanonicalPartitionValues(), statisticsPredicate, splittable, remainingInitialSplits).stream();
    });
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Constraint(io.trino.spi.connector.Constraint) DeltaLakeSessionProperties.getMaxInitialSplitSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxInitialSplitSize) URLDecoder(java.net.URLDecoder) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) BiFunction(java.util.function.BiFunction) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) DeltaLakeMetadata.createStatisticsPredicate(io.trino.plugin.deltalake.DeltaLakeMetadata.createStatisticsPredicate) ImmutableList(com.google.common.collect.ImmutableList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ENGLISH(java.util.Locale.ENGLISH) ExecutorService(java.util.concurrent.ExecutorService) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) Instant(java.time.Instant) DeltaLakeSessionProperties.getMaxSplitSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxSplitSize) DataSize(io.airlift.units.DataSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLoaderSafeConnectorSplitSource(io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitSource) Stream(java.util.stream.Stream) DynamicFilter(io.trino.spi.connector.DynamicFilter) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) TypeManager(io.trino.spi.type.TypeManager) TransactionLogParser.deserializePartitionValue(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.deserializePartitionValue) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DataSize(io.airlift.units.DataSize) ColumnHandle(io.trino.spi.connector.ColumnHandle) Optional(java.util.Optional) Instant(java.time.Instant) NullableValue(io.trino.spi.predicate.NullableValue) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 59 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class ParquetPageSourceFactory method getParquetTupleDomain.

public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<HiveColumnHandle> effectivePredicate, MessageType fileSchema, boolean useColumnNames) {
    if (effectivePredicate.isNone()) {
        return TupleDomain.none();
    }
    ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
    for (Entry<HiveColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
        HiveColumnHandle columnHandle = entry.getKey();
        // skip looking up predicates for complex types as Parquet only stores stats for primitives
        if (columnHandle.getHiveType().getCategory() != PRIMITIVE || columnHandle.getColumnType() != REGULAR) {
            continue;
        }
        RichColumnDescriptor descriptor;
        if (useColumnNames) {
            descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
        } else {
            org.apache.parquet.schema.Type parquetField = getParquetType(columnHandle, fileSchema, false);
            if (parquetField == null || !parquetField.isPrimitive()) {
                // Or the field is a complex type
                continue;
            }
            descriptor = descriptorsByPath.get(ImmutableList.of(parquetField.getName()));
        }
        if (descriptor != null) {
            predicate.put(descriptor, entry.getValue());
        }
    }
    return TupleDomain.withColumnDomains(predicate.buildOrThrow());
}
Also used : RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) ImmutableMap(com.google.common.collect.ImmutableMap) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 60 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class ParquetPageSourceFactory method getColumnIndexStore.

private static Optional<ColumnIndexStore> getColumnIndexStore(ParquetDataSource dataSource, BlockMetaData blockMetadata, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain, ParquetReaderOptions options) {
    if (!options.isUseColumnIndex() || parquetTupleDomain.isAll() || parquetTupleDomain.isNone()) {
        return Optional.empty();
    }
    boolean hasColumnIndex = false;
    for (ColumnChunkMetaData column : blockMetadata.getColumns()) {
        if (column.getColumnIndexReference() != null && column.getOffsetIndexReference() != null) {
            hasColumnIndex = true;
            break;
        }
    }
    if (!hasColumnIndex) {
        return Optional.empty();
    }
    Set<ColumnPath> columnsReadPaths = new HashSet<>(descriptorsByPath.size());
    for (List<String> path : descriptorsByPath.keySet()) {
        columnsReadPaths.add(ColumnPath.get(path.toArray(new String[0])));
    }
    Map<ColumnDescriptor, Domain> parquetDomains = parquetTupleDomain.getDomains().orElseThrow(() -> new IllegalStateException("Predicate other than none should have domains"));
    Set<ColumnPath> columnsFilteredPaths = parquetDomains.keySet().stream().map(column -> ColumnPath.get(column.getPath())).collect(toImmutableSet());
    return Optional.of(new TrinoColumnIndexStore(dataSource, blockMetadata, columnsReadPaths, columnsFilteredPaths));
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HIVE_MISSING_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CANNOT_OPEN_SPLIT(io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) MetadataReader(io.trino.parquet.reader.MetadataReader) HiveSessionProperties.isParquetUseColumnIndex(io.trino.plugin.hive.HiveSessionProperties.isParquetUseColumnIndex) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) HiveSessionProperties.isUseParquetColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveParquetColumnIOConverter.constructField(io.trino.plugin.hive.parquet.HiveParquetColumnIOConverter.constructField) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HiveUtil.getDeserializerClassName(io.trino.plugin.hive.util.HiveUtil.getDeserializerClassName) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) ImmutableSet(com.google.common.collect.ImmutableSet) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) ParquetReader(io.trino.parquet.reader.ParquetReader) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) MessageType(org.apache.parquet.schema.MessageType) List(java.util.List) HiveSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.hive.HiveSessionProperties.getParquetMaxReadBlockSize) BIGINT(io.trino.spi.type.BigintType.BIGINT) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) OptionalInt(java.util.OptionalInt) HiveSessionProperties.isParquetIgnoreStatistics(io.trino.plugin.hive.HiveSessionProperties.isParquetIgnoreStatistics) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) Inject(javax.inject.Inject) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) ParquetTypeUtils.lookupColumnByName(io.trino.parquet.ParquetTypeUtils.lookupColumnByName) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ParquetTypeUtils.getParquetTypeByName(io.trino.parquet.ParquetTypeUtils.getParquetTypeByName) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) Predicate(io.trino.parquet.predicate.Predicate) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) HivePageSourceProvider.projectSufficientColumns(io.trino.plugin.hive.HivePageSourceProvider.projectSufficientColumns) Properties(java.util.Properties) TrinoColumnIndexStore(io.trino.parquet.reader.TrinoColumnIndexStore) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) AcidInfo(io.trino.plugin.hive.AcidInfo) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) HivePageSourceProvider.projectBaseColumns(io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns) Field(io.trino.parquet.Field) ParquetDataSource(io.trino.parquet.ParquetDataSource) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) ColumnChunkMetaData(org.apache.parquet.hadoop.metadata.ColumnChunkMetaData) TrinoColumnIndexStore(io.trino.parquet.reader.TrinoColumnIndexStore) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) HashSet(java.util.HashSet)

Aggregations

TupleDomain (io.trino.spi.predicate.TupleDomain)97 Domain (io.trino.spi.predicate.Domain)77 Map (java.util.Map)50 ColumnHandle (io.trino.spi.connector.ColumnHandle)48 ImmutableMap (com.google.common.collect.ImmutableMap)43 ImmutableList (com.google.common.collect.ImmutableList)41 List (java.util.List)40 Optional (java.util.Optional)36 Set (java.util.Set)33 Test (org.testng.annotations.Test)33 Objects.requireNonNull (java.util.Objects.requireNonNull)32 ConnectorSession (io.trino.spi.connector.ConnectorSession)29 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)28 ImmutableSet (com.google.common.collect.ImmutableSet)26 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)23 Range (io.trino.spi.predicate.Range)22 String.format (java.lang.String.format)22 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)20 TrinoException (io.trino.spi.TrinoException)20 Type (io.trino.spi.type.Type)19