Search in sources :

Example 6 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class DeltaLakeConnector method beginTransaction.

@Override
public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) {
    checkConnectorSupports(READ_COMMITTED, isolationLevel);
    verify(autoCommit, "Catalog only supports writes using autocommit: DeltaLake");
    ConnectorTransactionHandle transaction = new HiveTransactionHandle(true);
    transactionManager.begin(transaction);
    return transaction;
}
Also used : ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle)

Example 7 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
        List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
        List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
        Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
        Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
        KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
        partitionInfos = kafkaFilteringResult.getPartitionInfos();
        partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
        partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
        ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
        Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
        Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
        for (PartitionInfo partitionInfo : partitionInfos) {
            TopicPartition topicPartition = toTopicPartition(partitionInfo);
            HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
            new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof TrinoException) {
            throw e;
        }
        throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) String.format(java.lang.String.format) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) Optional(java.util.Optional) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) HostAddress(io.trino.spi.HostAddress) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) TopicPartition(org.apache.kafka.common.TopicPartition) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 8 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class TestRaptorConnector method createTable.

private long createTable(String name) {
    ConnectorTransactionHandle transaction = beginTransaction();
    connector.getMetadata(SESSION, transaction).createTable(SESSION, new ConnectorTableMetadata(new SchemaTableName("test", name), ImmutableList.of(new ColumnMetadata("id", BIGINT))), false);
    connector.commit(transaction);
    transaction = beginTransaction();
    ConnectorTableHandle tableHandle = getTableHandle(connector.getMetadata(SESSION, transaction), name);
    connector.commit(transaction);
    return ((RaptorTableHandle) tableHandle).getTableId();
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 9 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class TestRaptorConnector method assertSplitShard.

private void assertSplitShard(Type temporalType, String min, String max, int expectedSplits) throws Exception {
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(new RaptorSessionProperties(new StorageManagerConfig()).getSessionProperties()).build();
    ConnectorTransactionHandle transaction = beginTransaction();
    connector.getMetadata(SESSION, transaction).createTable(SESSION, new ConnectorTableMetadata(new SchemaTableName("test", "test"), ImmutableList.of(new ColumnMetadata("id", BIGINT), new ColumnMetadata("time", temporalType)), ImmutableMap.of(TEMPORAL_COLUMN_PROPERTY, "time")), false);
    connector.commit(transaction);
    ConnectorTransactionHandle txn1 = beginTransaction();
    ConnectorTableHandle handle1 = getTableHandle(connector.getMetadata(SESSION, txn1), "test");
    ConnectorInsertTableHandle insertTableHandle = connector.getMetadata(SESSION, txn1).beginInsert(session, handle1);
    ConnectorPageSink raptorPageSink = connector.getPageSinkProvider().createPageSink(txn1, session, insertTableHandle);
    Object timestamp1 = null;
    Object timestamp2 = null;
    if (temporalType.equals(TIMESTAMP_MILLIS)) {
        timestamp1 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), min), 0);
        timestamp2 = SqlTimestamp.newInstance(3, castToShortTimestamp(TIMESTAMP_MILLIS.getPrecision(), max), 0);
    } else if (temporalType.equals(DATE)) {
        timestamp1 = new SqlDate(parseDate(min));
        timestamp2 = new SqlDate(parseDate(max));
    }
    Page inputPage = MaterializedResult.resultBuilder(session, ImmutableList.of(BIGINT, temporalType)).row(1L, timestamp1).row(2L, timestamp2).build().toPage();
    raptorPageSink.appendPage(inputPage);
    Collection<Slice> shards = raptorPageSink.finish().get();
    assertEquals(shards.size(), expectedSplits);
    connector.getMetadata(session, txn1).dropTable(session, handle1);
    connector.commit(txn1);
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Page(io.trino.spi.Page) SchemaTableName(io.trino.spi.connector.SchemaTableName) StorageManagerConfig(io.trino.plugin.raptor.legacy.storage.StorageManagerConfig) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Slice(io.airlift.slice.Slice) SqlDate(io.trino.spi.type.SqlDate) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 10 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class IcebergPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    IcebergSplit split = (IcebergSplit) connectorSplit;
    IcebergTableHandle table = (IcebergTableHandle) connectorTable;
    List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
    Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
    List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
    TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
    HdfsContext hdfsContext = new HdfsContext(session);
    ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
    Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
    return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Path(org.apache.hadoop.fs.Path) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) Optional(java.util.Optional) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Aggregations

ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)44 ConnectorSession (io.trino.spi.connector.ConnectorSession)23 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)21 ColumnHandle (io.trino.spi.connector.ColumnHandle)15 List (java.util.List)14 Objects.requireNonNull (java.util.Objects.requireNonNull)14 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)13 ImmutableList (com.google.common.collect.ImmutableList)12 Optional (java.util.Optional)12 Inject (javax.inject.Inject)12 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 CatalogName (io.trino.connector.CatalogName)9 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)9 DynamicFilter (io.trino.spi.connector.DynamicFilter)9 Test (org.testng.annotations.Test)9 SchemaTableName (io.trino.spi.connector.SchemaTableName)8 Map (java.util.Map)8 ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)7 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)7 TupleDomain (io.trino.spi.predicate.TupleDomain)7