Search in sources :

Example 31 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class IcebergPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    IcebergSplit split = (IcebergSplit) connectorSplit;
    IcebergTableHandle table = (IcebergTableHandle) connectorTable;
    List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
    Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
    TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
    HdfsContext hdfsContext = new HdfsContext(session);
    ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
    Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
    return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) Optional(java.util.Optional) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Example 32 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class RaptorPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    RaptorSplit raptorSplit = (RaptorSplit) split;
    RaptorTableHandle raptorTable = (RaptorTableHandle) table;
    OptionalInt bucketNumber = raptorSplit.getBucketNumber();
    TupleDomain<RaptorColumnHandle> predicate = raptorTable.getConstraint();
    OrcReaderOptions options = new OrcReaderOptions().withMaxMergeDistance(getReaderMaxMergeDistance(session)).withMaxBufferSize(getReaderMaxReadSize(session)).withStreamBufferSize(getReaderStreamBufferSize(session)).withTinyStripeThreshold(getReaderTinyStripeThreshold(session)).withLazyReadSmallRanges(isReaderLazyReadSmallRanges(session));
    OptionalLong transactionId = raptorSplit.getTransactionId();
    if (raptorSplit.getShardUuids().size() == 1) {
        UUID shardUuid = raptorSplit.getShardUuids().iterator().next();
        return createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId);
    }
    Iterator<ConnectorPageSource> iterator = raptorSplit.getShardUuids().stream().map(shardUuid -> createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId)).iterator();
    return new ConcatPageSource(iterator);
}
Also used : StorageManager(io.trino.plugin.raptor.legacy.storage.StorageManager) RaptorSessionProperties.getReaderTinyStripeThreshold(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderTinyStripeThreshold) Type(io.trino.spi.type.Type) OptionalInt(java.util.OptionalInt) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) Inject(javax.inject.Inject) RaptorSessionProperties.getReaderStreamBufferSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderStreamBufferSize) OptionalLong(java.util.OptionalLong) OrcReaderOptions(io.trino.orc.OrcReaderOptions) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RaptorSessionProperties.isReaderLazyReadSmallRanges(io.trino.plugin.raptor.legacy.RaptorSessionProperties.isReaderLazyReadSmallRanges) Iterator(java.util.Iterator) RaptorSessionProperties.getReaderMaxMergeDistance(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxMergeDistance) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ConnectorSession(io.trino.spi.connector.ConnectorSession) UUID(java.util.UUID) RaptorSessionProperties.getReaderMaxReadSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxReadSize) TupleDomain(io.trino.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OptionalLong(java.util.OptionalLong) OptionalInt(java.util.OptionalInt) UUID(java.util.UUID) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 33 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class ThriftTpchService method getRowsSync.

private TrinoThriftPageResult getRowsSync(TrinoThriftId splitId, List<String> outputColumns, long maxBytes, TrinoThriftNullableToken nextToken) {
    SplitInfo splitInfo = SPLIT_INFO_CODEC.fromJson(splitId.getId());
    checkArgument(maxBytes >= DEFAULT_MAX_PAGE_SIZE_IN_BYTES, "requested maxBytes is too small");
    ConnectorPageSource pageSource;
    if (!splitInfo.isIndexSplit()) {
        // normal scan
        pageSource = createPageSource(splitInfo, outputColumns);
    } else {
        // index lookup
        pageSource = createLookupPageSource(splitInfo, outputColumns);
    }
    return getRowsInternal(pageSource, splitInfo.getTableName(), outputColumns, nextToken.getToken());
}
Also used : ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 34 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class AbstractOperatorBenchmark method createTableScanOperator.

protected final OperatorFactory createTableScanOperator(int operatorId, PlanNodeId planNodeId, String tableName, String... columnNames) {
    checkArgument(session.getCatalog().isPresent(), "catalog not set");
    checkArgument(session.getSchema().isPresent(), "schema not set");
    // look up the table
    Metadata metadata = localQueryRunner.getMetadata();
    QualifiedObjectName qualifiedTableName = new QualifiedObjectName(session.getCatalog().get(), session.getSchema().get(), tableName);
    TableHandle tableHandle = metadata.getTableHandle(session, qualifiedTableName).orElse(null);
    checkArgument(tableHandle != null, "Table '%s' does not exist", qualifiedTableName);
    // lookup the columns
    Map<String, ColumnHandle> allColumnHandles = metadata.getColumnHandles(session, tableHandle);
    ImmutableList.Builder<ColumnHandle> columnHandlesBuilder = ImmutableList.builder();
    for (String columnName : columnNames) {
        ColumnHandle columnHandle = allColumnHandles.get(columnName);
        checkArgument(columnHandle != null, "Table '%s' does not have a column '%s'", tableName, columnName);
        columnHandlesBuilder.add(columnHandle);
    }
    List<ColumnHandle> columnHandles = columnHandlesBuilder.build();
    // get the split for this table
    Split split = getLocalQuerySplit(session, tableHandle);
    return new OperatorFactory() {

        @Override
        public Operator createOperator(DriverContext driverContext) {
            OperatorContext operatorContext = driverContext.addOperatorContext(operatorId, planNodeId, "BenchmarkSource");
            ConnectorPageSource pageSource = localQueryRunner.getPageSourceManager().createPageSource(session, split, tableHandle, columnHandles, DynamicFilter.EMPTY);
            return new PageSourceOperator(pageSource, operatorContext);
        }

        @Override
        public void noMoreOperators() {
        }

        @Override
        public OperatorFactory duplicate() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) DriverContext(io.trino.operator.DriverContext) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) AggregationMetadata(io.trino.operator.aggregation.AggregationMetadata) Metadata(io.trino.metadata.Metadata) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) PageSourceOperator(io.trino.operator.PageSourceOperator) OperatorFactory(io.trino.operator.OperatorFactory) OperatorContext(io.trino.operator.OperatorContext) TableHandle(io.trino.metadata.TableHandle) Split(io.trino.metadata.Split)

Example 35 with ConnectorPageSource

use of io.trino.spi.connector.ConnectorPageSource in project trino by trinodb.

the class AbstractTestHive method testPartitionSchemaNonCanonical.

// TODO coercion of non-canonical values should be supported
@Test(enabled = false)
public void testPartitionSchemaNonCanonical() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle table = getTableHandle(metadata, tablePartitionSchemaChangeNonCanonical);
        ColumnHandle column = metadata.getColumnHandles(session, table).get("t_boolean");
        Constraint constraint = new Constraint(TupleDomain.fromFixedValues(ImmutableMap.of(column, NullableValue.of(BOOLEAN, false))));
        table = applyFilter(metadata, table, constraint);
        HivePartition partition = getOnlyElement(((HiveTableHandle) table).getPartitions().orElseThrow(AssertionError::new));
        assertEquals(getPartitionId(partition), "t_boolean=0");
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, table);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        ImmutableList<ColumnHandle> columnHandles = ImmutableList.of(column);
        try (ConnectorPageSource ignored = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, table, columnHandles, DynamicFilter.EMPTY)) {
            fail("expected exception");
        } catch (TrinoException e) {
            assertEquals(e.getErrorCode(), HIVE_INVALID_PARTITION_VALUE.toErrorCode());
        }
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) Constraint(io.trino.spi.connector.Constraint) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) TrinoException(io.trino.spi.TrinoException) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Test(org.testng.annotations.Test)

Aggregations

ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)50 ConnectorSession (io.trino.spi.connector.ConnectorSession)23 Page (io.trino.spi.Page)18 Type (io.trino.spi.type.Type)18 Test (org.testng.annotations.Test)17 ImmutableList (com.google.common.collect.ImmutableList)16 MaterializedResult (io.trino.testing.MaterializedResult)14 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)13 List (java.util.List)12 Optional (java.util.Optional)12 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 TestingConnectorSession (io.trino.testing.TestingConnectorSession)10 File (java.io.File)10 Path (org.apache.hadoop.fs.Path)10 TupleDomain (io.trino.spi.predicate.TupleDomain)9 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)9