Search in sources :

Example 11 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
        List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
        List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
        Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
        Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
        KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
        partitionInfos = kafkaFilteringResult.getPartitionInfos();
        partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
        partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
        ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
        Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
        Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
        for (PartitionInfo partitionInfo : partitionInfos) {
            TopicPartition topicPartition = toTopicPartition(partitionInfo);
            HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
            new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof TrinoException) {
            throw e;
        }
        throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) String.format(java.lang.String.format) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) Optional(java.util.Optional) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) HostAddress(io.trino.spi.HostAddress) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) TopicPartition(org.apache.kafka.common.TopicPartition) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 12 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class IcebergPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    IcebergSplit split = (IcebergSplit) connectorSplit;
    IcebergTableHandle table = (IcebergTableHandle) connectorTable;
    List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
    Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
    TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
    HdfsContext hdfsContext = new HdfsContext(session);
    ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
    Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
    return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) Optional(java.util.Optional) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Example 13 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class TestIcebergSplitSource method testIncompleteDynamicFilterTimeout.

@Test(timeOut = 30_000)
public void testIncompleteDynamicFilterTimeout() throws Exception {
    long startMillis = System.currentTimeMillis();
    SchemaTableName schemaTableName = new SchemaTableName("tpch", "nation");
    IcebergTableHandle tableHandle = new IcebergTableHandle(schemaTableName.getSchemaName(), schemaTableName.getTableName(), TableType.DATA, Optional.empty(), TupleDomain.all(), TupleDomain.all(), ImmutableSet.of(), Optional.empty());
    Table nationTable = catalog.loadTable(SESSION, schemaTableName);
    IcebergSplitSource splitSource = new IcebergSplitSource(tableHandle, nationTable.newScan(), Optional.empty(), new DynamicFilter() {

        @Override
        public Set<ColumnHandle> getColumnsCovered() {
            return ImmutableSet.of();
        }

        @Override
        public CompletableFuture<?> isBlocked() {
            return CompletableFuture.runAsync(() -> {
                try {
                    TimeUnit.HOURS.sleep(1);
                } catch (InterruptedException e) {
                    throw new IllegalStateException(e);
                }
            });
        }

        @Override
        public boolean isComplete() {
            return false;
        }

        @Override
        public boolean isAwaitable() {
            return true;
        }

        @Override
        public TupleDomain<ColumnHandle> getCurrentPredicate() {
            return TupleDomain.all();
        }
    }, new Duration(2, SECONDS), alwaysTrue(), new TestingTypeManager(), false);
    ImmutableList.Builder<IcebergSplit> splits = ImmutableList.builder();
    while (!splitSource.isFinished()) {
        splitSource.getNextBatch(null, 100).get().getSplits().stream().map(IcebergSplit.class::cast).forEach(splits::add);
    }
    assertThat(splits.build().size()).isGreaterThan(0);
    assertTrue(splitSource.isFinished());
    assertThat(System.currentTimeMillis() - startMillis).as("IcebergSplitSource failed to wait for dynamicFilteringWaitTimeout").isGreaterThanOrEqualTo(2000);
}
Also used : Table(org.apache.iceberg.Table) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ValueSet(io.trino.spi.predicate.ValueSet) DynamicFilter(io.trino.spi.connector.DynamicFilter) ImmutableList(com.google.common.collect.ImmutableList) Duration(io.airlift.units.Duration) SchemaTableName(io.trino.spi.connector.SchemaTableName) CompletableFuture(java.util.concurrent.CompletableFuture) TupleDomain(io.trino.spi.predicate.TupleDomain) TestingTypeManager(io.trino.spi.type.TestingTypeManager) Test(org.testng.annotations.Test)

Example 14 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class PhoenixSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JdbcTableHandle tableHandle = (JdbcTableHandle) table;
    try (Connection connection = phoenixClient.getConnection(session)) {
        List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
        PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
        int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
        List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
        return new FixedSplitSource(splits);
    } catch (IOException | SQLException e) {
        throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
    }
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) KeyRange(org.apache.phoenix.query.KeyRange) PHOENIX_INTERNAL_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_INTERNAL_ERROR) Connection(java.sql.Connection) Logger(io.airlift.log.Logger) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) QueryPlan(org.apache.phoenix.compile.QueryPlan) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) Bytes(org.apache.hadoop.hbase.util.Bytes) TableName(org.apache.hadoop.hbase.TableName) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) Scan(org.apache.hadoop.hbase.client.Scan) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) HostAddress(io.trino.spi.HostAddress) PHOENIX_SPLIT_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_SPLIT_ERROR) EXPECTED_UPPER_REGION_KEY(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) SQLException(java.sql.SQLException) Connection(java.sql.Connection) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) IOException(java.io.IOException) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) TrinoException(io.trino.spi.TrinoException) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 15 with DynamicFilter

use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.

the class RaptorPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    RaptorSplit raptorSplit = (RaptorSplit) split;
    RaptorTableHandle raptorTable = (RaptorTableHandle) table;
    OptionalInt bucketNumber = raptorSplit.getBucketNumber();
    TupleDomain<RaptorColumnHandle> predicate = raptorTable.getConstraint();
    OrcReaderOptions options = new OrcReaderOptions().withMaxMergeDistance(getReaderMaxMergeDistance(session)).withMaxBufferSize(getReaderMaxReadSize(session)).withStreamBufferSize(getReaderStreamBufferSize(session)).withTinyStripeThreshold(getReaderTinyStripeThreshold(session)).withLazyReadSmallRanges(isReaderLazyReadSmallRanges(session));
    OptionalLong transactionId = raptorSplit.getTransactionId();
    if (raptorSplit.getShardUuids().size() == 1) {
        UUID shardUuid = raptorSplit.getShardUuids().iterator().next();
        return createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId);
    }
    Iterator<ConnectorPageSource> iterator = raptorSplit.getShardUuids().stream().map(shardUuid -> createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId)).iterator();
    return new ConcatPageSource(iterator);
}
Also used : StorageManager(io.trino.plugin.raptor.legacy.storage.StorageManager) RaptorSessionProperties.getReaderTinyStripeThreshold(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderTinyStripeThreshold) Type(io.trino.spi.type.Type) OptionalInt(java.util.OptionalInt) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) Inject(javax.inject.Inject) RaptorSessionProperties.getReaderStreamBufferSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderStreamBufferSize) OptionalLong(java.util.OptionalLong) OrcReaderOptions(io.trino.orc.OrcReaderOptions) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RaptorSessionProperties.isReaderLazyReadSmallRanges(io.trino.plugin.raptor.legacy.RaptorSessionProperties.isReaderLazyReadSmallRanges) Iterator(java.util.Iterator) RaptorSessionProperties.getReaderMaxMergeDistance(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxMergeDistance) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ConnectorSession(io.trino.spi.connector.ConnectorSession) UUID(java.util.UUID) RaptorSessionProperties.getReaderMaxReadSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxReadSize) TupleDomain(io.trino.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OptionalLong(java.util.OptionalLong) OptionalInt(java.util.OptionalInt) UUID(java.util.UUID) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Aggregations

DynamicFilter (io.trino.spi.connector.DynamicFilter)32 Test (org.testng.annotations.Test)23 DynamicFilterId (io.trino.sql.planner.plan.DynamicFilterId)20 TestingColumnHandle (io.trino.spi.connector.TestingColumnHandle)19 ColumnHandle (io.trino.spi.connector.ColumnHandle)18 TupleDomain (io.trino.spi.predicate.TupleDomain)13 Symbol (io.trino.sql.planner.Symbol)11 SymbolAllocator (io.trino.sql.planner.SymbolAllocator)11 QueryId (io.trino.spi.QueryId)10 List (java.util.List)10 ImmutableList (com.google.common.collect.ImmutableList)9 StageId (io.trino.execution.StageId)9 Domain (io.trino.spi.predicate.Domain)9 Objects.requireNonNull (java.util.Objects.requireNonNull)9 TaskId (io.trino.execution.TaskId)8 ConnectorSession (io.trino.spi.connector.ConnectorSession)8 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)8 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)8 Optional (java.util.Optional)8 Inject (javax.inject.Inject)8