use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.
the class PhoenixSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
JdbcTableHandle tableHandle = (JdbcTableHandle) table;
try (Connection connection = phoenixClient.getConnection(session)) {
List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
return new FixedSplitSource(splits);
} catch (IOException | SQLException e) {
throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
}
}
use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.
the class RaptorPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
RaptorSplit raptorSplit = (RaptorSplit) split;
RaptorTableHandle raptorTable = (RaptorTableHandle) table;
OptionalInt bucketNumber = raptorSplit.getBucketNumber();
TupleDomain<RaptorColumnHandle> predicate = raptorTable.getConstraint();
OrcReaderOptions options = new OrcReaderOptions().withMaxMergeDistance(getReaderMaxMergeDistance(session)).withMaxBufferSize(getReaderMaxReadSize(session)).withStreamBufferSize(getReaderStreamBufferSize(session)).withTinyStripeThreshold(getReaderTinyStripeThreshold(session)).withLazyReadSmallRanges(isReaderLazyReadSmallRanges(session));
OptionalLong transactionId = raptorSplit.getTransactionId();
if (raptorSplit.getShardUuids().size() == 1) {
UUID shardUuid = raptorSplit.getShardUuids().iterator().next();
return createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId);
}
Iterator<ConnectorPageSource> iterator = raptorSplit.getShardUuids().stream().map(shardUuid -> createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId)).iterator();
return new ConcatPageSource(iterator);
}
use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.
the class DeltaLakeSplitManager method getSplits.
private Stream<DeltaLakeSplit> getSplits(ConnectorTransactionHandle transaction, DeltaLakeTableHandle tableHandle, ConnectorSession session, Optional<DataSize> maxScannedFileSize, Set<ColumnHandle> columnsCoveredByDynamicFilter, Constraint constraint) {
DeltaLakeMetastore metastore = getMetastore(session, transaction);
String tableLocation = metastore.getTableLocation(tableHandle.getSchemaTableName(), session);
List<AddFileEntry> validDataFiles = metastore.getValidDataFiles(tableHandle.getSchemaTableName(), session);
TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint();
TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint = tableHandle.getNonPartitionConstraint();
// Delta Lake handles updates and deletes by copying entire data files, minus updates/deletes. Because of this we can only have one Split/UpdatablePageSource
// per file.
boolean splittable = tableHandle.getWriteType().isEmpty();
AtomicInteger remainingInitialSplits = new AtomicInteger(maxInitialSplits);
Optional<Instant> filesModifiedAfter = tableHandle.getAnalyzeHandle().flatMap(AnalyzeHandle::getFilesModifiedAfter);
Optional<Long> maxScannedFileSizeInBytes = maxScannedFileSize.map(DataSize::toBytes);
Set<String> predicatedColumnNames = Stream.concat(nonPartitionConstraint.getDomains().orElseThrow().keySet().stream(), columnsCoveredByDynamicFilter.stream().map(DeltaLakeColumnHandle.class::cast)).map(// TODO is DeltaLakeColumnHandle.name normalized?
column -> column.getName().toLowerCase(ENGLISH)).collect(toImmutableSet());
List<ColumnMetadata> schema = extractSchema(tableHandle.getMetadataEntry(), typeManager);
List<ColumnMetadata> predicatedColumns = schema.stream().filter(// ColumnMetadata.name is lowercase
column -> predicatedColumnNames.contains(column.getName())).collect(toImmutableList());
return validDataFiles.stream().flatMap(addAction -> {
if (tableHandle.getAnalyzeHandle().isPresent() && !tableHandle.getAnalyzeHandle().get().isInitialAnalyze() && !addAction.isDataChange()) {
// skip files which do not introduce data change on non-initial ANALYZE
return Stream.empty();
}
if (filesModifiedAfter.isPresent() && addAction.getModificationTime() <= filesModifiedAfter.get().toEpochMilli()) {
return Stream.empty();
}
if (maxScannedFileSizeInBytes.isPresent() && addAction.getSize() > maxScannedFileSizeInBytes.get()) {
return Stream.empty();
}
Map<DeltaLakeColumnHandle, Domain> enforcedDomains = enforcedPartitionConstraint.getDomains().orElseThrow();
if (!partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), enforcedDomains)) {
return Stream.empty();
}
TupleDomain<DeltaLakeColumnHandle> statisticsPredicate = createStatisticsPredicate(addAction, predicatedColumns, tableHandle.getMetadataEntry().getCanonicalPartitionColumns());
if (!nonPartitionConstraint.overlaps(statisticsPredicate)) {
return Stream.empty();
}
if (constraint.predicate().isPresent()) {
Map<String, Optional<String>> partitionValues = addAction.getCanonicalPartitionValues();
Map<ColumnHandle, NullableValue> deserializedValues = constraint.getPredicateColumns().orElseThrow().stream().filter(column -> column instanceof DeltaLakeColumnHandle).filter(column -> partitionValues.containsKey(((DeltaLakeColumnHandle) column).getName())).collect(toImmutableMap(identity(), column -> {
DeltaLakeColumnHandle deltaLakeColumn = (DeltaLakeColumnHandle) column;
return NullableValue.of(deltaLakeColumn.getType(), deserializePartitionValue(deltaLakeColumn, addAction.getCanonicalPartitionValues().get(deltaLakeColumn.getName())));
}));
if (!constraint.predicate().get().test(deserializedValues)) {
return Stream.empty();
}
}
return splitsForFile(session, addAction, tableLocation, addAction.getCanonicalPartitionValues(), statisticsPredicate, splittable, remainingInitialSplits).stream();
});
}
use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.
the class IcebergConnector method beginTransaction.
@Override
public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) {
checkConnectorSupports(SERIALIZABLE, isolationLevel);
ConnectorTransactionHandle transaction = new HiveTransactionHandle(autoCommit);
transactionManager.begin(transaction);
return transaction;
}
use of io.trino.spi.connector.ConnectorTransactionHandle in project trino by trinodb.
the class TestCassandraConnector method testGetTupleType.
@Test
public void testGetTupleType() {
// TODO add test with nested tuple types
ConnectorTableHandle tableHandle = getTableHandle(tableTuple);
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, tableHandle);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(SESSION, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
ConnectorTransactionHandle transaction = CassandraTransactionHandle.INSTANCE;
List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(transaction, SESSION, tableHandle, UNGROUPED_SCHEDULING, DynamicFilter.EMPTY));
long rowNumber = 0;
for (ConnectorSplit split : splits) {
CassandraSplit cassandraSplit = (CassandraSplit) split;
long completedBytes = 0;
try (RecordCursor cursor = recordSetProvider.getRecordSet(transaction, SESSION, cassandraSplit, tableHandle, columnHandles).cursor()) {
while (cursor.advanceNextPosition()) {
try {
assertReadFields(cursor, tableMetadata.getColumns());
} catch (RuntimeException e) {
throw new RuntimeException("row " + rowNumber, e);
}
rowNumber++;
String keyValue = cursor.getSlice(columnIndex.get("key")).toStringUtf8();
assertEquals(keyValue, Long.toString(rowNumber));
SingleRowBlock tupleValueBlock = (SingleRowBlock) cursor.getObject(columnIndex.get("typetuple"));
assertThat(tupleValueBlock.getPositionCount()).isEqualTo(3);
CassandraColumnHandle tupleColumnHandle = (CassandraColumnHandle) columnHandles.get(columnIndex.get("typetuple"));
List<CassandraType> tupleArgumentTypes = tupleColumnHandle.getCassandraType().getArgumentTypes();
assertThat(tupleArgumentTypes.get(0).getTrinoType().getLong(tupleValueBlock, 0)).isEqualTo(rowNumber);
assertThat(tupleArgumentTypes.get(1).getTrinoType().getSlice(tupleValueBlock, 1).toStringUtf8()).isEqualTo("text-" + rowNumber);
assertThat(tupleArgumentTypes.get(2).getTrinoType().getLong(tupleValueBlock, 2)).isEqualTo(Float.floatToRawIntBits(1.11f * rowNumber));
long newCompletedBytes = cursor.getCompletedBytes();
assertTrue(newCompletedBytes >= completedBytes);
completedBytes = newCompletedBytes;
}
}
}
assertEquals(rowNumber, 2);
}
Aggregations