Search in sources :

Example 11 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class PhoenixSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JdbcTableHandle tableHandle = (JdbcTableHandle) table;
    try (Connection connection = phoenixClient.getConnection(session)) {
        List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
        PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
        int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
        List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
        return new FixedSplitSource(splits);
    } catch (IOException | SQLException e) {
        throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
    }
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) KeyRange(org.apache.phoenix.query.KeyRange) PHOENIX_INTERNAL_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_INTERNAL_ERROR) Connection(java.sql.Connection) Logger(io.airlift.log.Logger) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) QueryPlan(org.apache.phoenix.compile.QueryPlan) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) Bytes(org.apache.hadoop.hbase.util.Bytes) TableName(org.apache.hadoop.hbase.TableName) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) Scan(org.apache.hadoop.hbase.client.Scan) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) HostAddress(io.trino.spi.HostAddress) PHOENIX_SPLIT_ERROR(io.trino.plugin.phoenix.PhoenixErrorCode.PHOENIX_SPLIT_ERROR) EXPECTED_UPPER_REGION_KEY(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) SQLException(java.sql.SQLException) Connection(java.sql.Connection) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) IOException(java.io.IOException) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) TrinoException(io.trino.spi.TrinoException) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 12 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class RaptorPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
    RaptorSplit raptorSplit = (RaptorSplit) split;
    RaptorTableHandle raptorTable = (RaptorTableHandle) table;
    OptionalInt bucketNumber = raptorSplit.getBucketNumber();
    TupleDomain<RaptorColumnHandle> predicate = raptorTable.getConstraint();
    OrcReaderOptions options = new OrcReaderOptions().withMaxMergeDistance(getReaderMaxMergeDistance(session)).withMaxBufferSize(getReaderMaxReadSize(session)).withStreamBufferSize(getReaderStreamBufferSize(session)).withTinyStripeThreshold(getReaderTinyStripeThreshold(session)).withLazyReadSmallRanges(isReaderLazyReadSmallRanges(session));
    OptionalLong transactionId = raptorSplit.getTransactionId();
    if (raptorSplit.getShardUuids().size() == 1) {
        UUID shardUuid = raptorSplit.getShardUuids().iterator().next();
        return createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId);
    }
    Iterator<ConnectorPageSource> iterator = raptorSplit.getShardUuids().stream().map(shardUuid -> createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId)).iterator();
    return new ConcatPageSource(iterator);
}
Also used : StorageManager(io.trino.plugin.raptor.legacy.storage.StorageManager) RaptorSessionProperties.getReaderTinyStripeThreshold(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderTinyStripeThreshold) Type(io.trino.spi.type.Type) OptionalInt(java.util.OptionalInt) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) Inject(javax.inject.Inject) RaptorSessionProperties.getReaderStreamBufferSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderStreamBufferSize) OptionalLong(java.util.OptionalLong) OrcReaderOptions(io.trino.orc.OrcReaderOptions) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RaptorSessionProperties.isReaderLazyReadSmallRanges(io.trino.plugin.raptor.legacy.RaptorSessionProperties.isReaderLazyReadSmallRanges) Iterator(java.util.Iterator) RaptorSessionProperties.getReaderMaxMergeDistance(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxMergeDistance) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ConnectorSession(io.trino.spi.connector.ConnectorSession) UUID(java.util.UUID) RaptorSessionProperties.getReaderMaxReadSize(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getReaderMaxReadSize) TupleDomain(io.trino.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConcatPageSource(io.trino.plugin.raptor.legacy.util.ConcatPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OptionalLong(java.util.OptionalLong) OptionalInt(java.util.OptionalInt) UUID(java.util.UUID) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 13 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class DeltaLakeSplitManager method getSplits.

private Stream<DeltaLakeSplit> getSplits(ConnectorTransactionHandle transaction, DeltaLakeTableHandle tableHandle, ConnectorSession session, Optional<DataSize> maxScannedFileSize, Set<ColumnHandle> columnsCoveredByDynamicFilter, Constraint constraint) {
    DeltaLakeMetastore metastore = getMetastore(session, transaction);
    String tableLocation = metastore.getTableLocation(tableHandle.getSchemaTableName(), session);
    List<AddFileEntry> validDataFiles = metastore.getValidDataFiles(tableHandle.getSchemaTableName(), session);
    TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint();
    TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint = tableHandle.getNonPartitionConstraint();
    // Delta Lake handles updates and deletes by copying entire data files, minus updates/deletes. Because of this we can only have one Split/UpdatablePageSource
    // per file.
    boolean splittable = tableHandle.getWriteType().isEmpty();
    AtomicInteger remainingInitialSplits = new AtomicInteger(maxInitialSplits);
    Optional<Instant> filesModifiedAfter = tableHandle.getAnalyzeHandle().flatMap(AnalyzeHandle::getFilesModifiedAfter);
    Optional<Long> maxScannedFileSizeInBytes = maxScannedFileSize.map(DataSize::toBytes);
    Set<String> predicatedColumnNames = Stream.concat(nonPartitionConstraint.getDomains().orElseThrow().keySet().stream(), columnsCoveredByDynamicFilter.stream().map(DeltaLakeColumnHandle.class::cast)).map(// TODO is DeltaLakeColumnHandle.name normalized?
    column -> column.getName().toLowerCase(ENGLISH)).collect(toImmutableSet());
    List<ColumnMetadata> schema = extractSchema(tableHandle.getMetadataEntry(), typeManager);
    List<ColumnMetadata> predicatedColumns = schema.stream().filter(// ColumnMetadata.name is lowercase
    column -> predicatedColumnNames.contains(column.getName())).collect(toImmutableList());
    return validDataFiles.stream().flatMap(addAction -> {
        if (tableHandle.getAnalyzeHandle().isPresent() && !tableHandle.getAnalyzeHandle().get().isInitialAnalyze() && !addAction.isDataChange()) {
            // skip files which do not introduce data change on non-initial ANALYZE
            return Stream.empty();
        }
        if (filesModifiedAfter.isPresent() && addAction.getModificationTime() <= filesModifiedAfter.get().toEpochMilli()) {
            return Stream.empty();
        }
        if (maxScannedFileSizeInBytes.isPresent() && addAction.getSize() > maxScannedFileSizeInBytes.get()) {
            return Stream.empty();
        }
        Map<DeltaLakeColumnHandle, Domain> enforcedDomains = enforcedPartitionConstraint.getDomains().orElseThrow();
        if (!partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), enforcedDomains)) {
            return Stream.empty();
        }
        TupleDomain<DeltaLakeColumnHandle> statisticsPredicate = createStatisticsPredicate(addAction, predicatedColumns, tableHandle.getMetadataEntry().getCanonicalPartitionColumns());
        if (!nonPartitionConstraint.overlaps(statisticsPredicate)) {
            return Stream.empty();
        }
        if (constraint.predicate().isPresent()) {
            Map<String, Optional<String>> partitionValues = addAction.getCanonicalPartitionValues();
            Map<ColumnHandle, NullableValue> deserializedValues = constraint.getPredicateColumns().orElseThrow().stream().filter(column -> column instanceof DeltaLakeColumnHandle).filter(column -> partitionValues.containsKey(((DeltaLakeColumnHandle) column).getName())).collect(toImmutableMap(identity(), column -> {
                DeltaLakeColumnHandle deltaLakeColumn = (DeltaLakeColumnHandle) column;
                return NullableValue.of(deltaLakeColumn.getType(), deserializePartitionValue(deltaLakeColumn, addAction.getCanonicalPartitionValues().get(deltaLakeColumn.getName())));
            }));
            if (!constraint.predicate().get().test(deserializedValues)) {
                return Stream.empty();
            }
        }
        return splitsForFile(session, addAction, tableLocation, addAction.getCanonicalPartitionValues(), statisticsPredicate, splittable, remainingInitialSplits).stream();
    });
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Constraint(io.trino.spi.connector.Constraint) DeltaLakeSessionProperties.getMaxInitialSplitSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxInitialSplitSize) URLDecoder(java.net.URLDecoder) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) BiFunction(java.util.function.BiFunction) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) DeltaLakeMetadata.createStatisticsPredicate(io.trino.plugin.deltalake.DeltaLakeMetadata.createStatisticsPredicate) ImmutableList(com.google.common.collect.ImmutableList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ENGLISH(java.util.Locale.ENGLISH) ExecutorService(java.util.concurrent.ExecutorService) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) Instant(java.time.Instant) DeltaLakeSessionProperties.getMaxSplitSize(io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxSplitSize) DataSize(io.airlift.units.DataSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLoaderSafeConnectorSplitSource(io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitSource) Stream(java.util.stream.Stream) DynamicFilter(io.trino.spi.connector.DynamicFilter) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) TypeManager(io.trino.spi.type.TypeManager) TransactionLogParser.deserializePartitionValue(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.deserializePartitionValue) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DataSize(io.airlift.units.DataSize) ColumnHandle(io.trino.spi.connector.ColumnHandle) Optional(java.util.Optional) Instant(java.time.Instant) NullableValue(io.trino.spi.predicate.NullableValue) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 14 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class IcebergConnector method beginTransaction.

@Override
public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) {
    checkConnectorSupports(SERIALIZABLE, isolationLevel);
    ConnectorTransactionHandle transaction = new HiveTransactionHandle(autoCommit);
    transactionManager.begin(transaction);
    return transaction;
}
Also used : ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle)

Example 15 with ConnectorTransactionHandle

use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.

the class TestCassandraConnector method testGetTupleType.

@Test
public void testGetTupleType() {
    // TODO add test with nested tuple types
    ConnectorTableHandle tableHandle = getTableHandle(tableTuple);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, tableHandle);
    List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(SESSION, tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);
    ConnectorTransactionHandle transaction = CassandraTransactionHandle.INSTANCE;
    List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(transaction, SESSION, tableHandle, UNGROUPED_SCHEDULING, DynamicFilter.EMPTY));
    long rowNumber = 0;
    for (ConnectorSplit split : splits) {
        CassandraSplit cassandraSplit = (CassandraSplit) split;
        long completedBytes = 0;
        try (RecordCursor cursor = recordSetProvider.getRecordSet(transaction, SESSION, cassandraSplit, tableHandle, columnHandles).cursor()) {
            while (cursor.advanceNextPosition()) {
                try {
                    assertReadFields(cursor, tableMetadata.getColumns());
                } catch (RuntimeException e) {
                    throw new RuntimeException("row " + rowNumber, e);
                }
                rowNumber++;
                String keyValue = cursor.getSlice(columnIndex.get("key")).toStringUtf8();
                assertEquals(keyValue, Long.toString(rowNumber));
                SingleRowBlock tupleValueBlock = (SingleRowBlock) cursor.getObject(columnIndex.get("typetuple"));
                assertThat(tupleValueBlock.getPositionCount()).isEqualTo(3);
                CassandraColumnHandle tupleColumnHandle = (CassandraColumnHandle) columnHandles.get(columnIndex.get("typetuple"));
                List<CassandraType> tupleArgumentTypes = tupleColumnHandle.getCassandraType().getArgumentTypes();
                assertThat(tupleArgumentTypes.get(0).getTrinoType().getLong(tupleValueBlock, 0)).isEqualTo(rowNumber);
                assertThat(tupleArgumentTypes.get(1).getTrinoType().getSlice(tupleValueBlock, 1).toStringUtf8()).isEqualTo("text-" + rowNumber);
                assertThat(tupleArgumentTypes.get(2).getTrinoType().getLong(tupleValueBlock, 2)).isEqualTo(Float.floatToRawIntBits(1.11f * rowNumber));
                long newCompletedBytes = cursor.getCompletedBytes();
                assertTrue(newCompletedBytes >= completedBytes);
                completedBytes = newCompletedBytes;
            }
        }
    }
    assertEquals(rowNumber, 2);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) RecordCursor(io.trino.spi.connector.RecordCursor) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) SingleRowBlock(io.trino.spi.block.SingleRowBlock) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Aggregations

ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)44 ConnectorSession (io.trino.spi.connector.ConnectorSession)23 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)21 ColumnHandle (io.trino.spi.connector.ColumnHandle)15 List (java.util.List)14 Objects.requireNonNull (java.util.Objects.requireNonNull)14 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)13 ImmutableList (com.google.common.collect.ImmutableList)12 Optional (java.util.Optional)12 Inject (javax.inject.Inject)12 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)10 CatalogName (io.trino.connector.CatalogName)9 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)9 DynamicFilter (io.trino.spi.connector.DynamicFilter)9 Test (org.testng.annotations.Test)9 SchemaTableName (io.trino.spi.connector.SchemaTableName)8 Map (java.util.Map)8 ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)7 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)7 TupleDomain (io.trino.spi.predicate.TupleDomain)7