use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class KafkaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
partitionInfos = kafkaFilteringResult.getPartitionInfos();
partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
for (PartitionInfo partitionInfo : partitionInfos) {
TopicPartition topicPartition = toTopicPartition(partitionInfo);
HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
}
return new FixedSplitSource(splits.build());
} catch (Exception e) {
// Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
if (e instanceof TrinoException) {
throw e;
}
throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
}
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class IcebergPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
IcebergSplit split = (IcebergSplit) connectorSplit;
IcebergTableHandle table = (IcebergTableHandle) connectorTable;
List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
HdfsContext hdfsContext = new HdfsContext(session);
ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class TestIcebergSplitSource method testIncompleteDynamicFilterTimeout.
@Test(timeOut = 30_000)
public void testIncompleteDynamicFilterTimeout() throws Exception {
long startMillis = System.currentTimeMillis();
SchemaTableName schemaTableName = new SchemaTableName("tpch", "nation");
IcebergTableHandle tableHandle = new IcebergTableHandle(schemaTableName.getSchemaName(), schemaTableName.getTableName(), TableType.DATA, Optional.empty(), TupleDomain.all(), TupleDomain.all(), ImmutableSet.of(), Optional.empty());
Table nationTable = catalog.loadTable(SESSION, schemaTableName);
IcebergSplitSource splitSource = new IcebergSplitSource(tableHandle, nationTable.newScan(), Optional.empty(), new DynamicFilter() {
@Override
public Set<ColumnHandle> getColumnsCovered() {
return ImmutableSet.of();
}
@Override
public CompletableFuture<?> isBlocked() {
return CompletableFuture.runAsync(() -> {
try {
TimeUnit.HOURS.sleep(1);
} catch (InterruptedException e) {
throw new IllegalStateException(e);
}
});
}
@Override
public boolean isComplete() {
return false;
}
@Override
public boolean isAwaitable() {
return true;
}
@Override
public TupleDomain<ColumnHandle> getCurrentPredicate() {
return TupleDomain.all();
}
}, new Duration(2, SECONDS), alwaysTrue(), new TestingTypeManager(), false);
ImmutableList.Builder<IcebergSplit> splits = ImmutableList.builder();
while (!splitSource.isFinished()) {
splitSource.getNextBatch(null, 100).get().getSplits().stream().map(IcebergSplit.class::cast).forEach(splits::add);
}
assertThat(splits.build().size()).isGreaterThan(0);
assertTrue(splitSource.isFinished());
assertThat(System.currentTimeMillis() - startMillis).as("IcebergSplitSource failed to wait for dynamicFilteringWaitTimeout").isGreaterThanOrEqualTo(2000);
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class PhoenixSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
JdbcTableHandle tableHandle = (JdbcTableHandle) table;
try (Connection connection = phoenixClient.getConnection(session)) {
List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
return new FixedSplitSource(splits);
} catch (IOException | SQLException e) {
throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
}
}
use of io.trino.spi.connector.DynamicFilter in project trino by trinodb.
the class RaptorPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
RaptorSplit raptorSplit = (RaptorSplit) split;
RaptorTableHandle raptorTable = (RaptorTableHandle) table;
OptionalInt bucketNumber = raptorSplit.getBucketNumber();
TupleDomain<RaptorColumnHandle> predicate = raptorTable.getConstraint();
OrcReaderOptions options = new OrcReaderOptions().withMaxMergeDistance(getReaderMaxMergeDistance(session)).withMaxBufferSize(getReaderMaxReadSize(session)).withStreamBufferSize(getReaderStreamBufferSize(session)).withTinyStripeThreshold(getReaderTinyStripeThreshold(session)).withLazyReadSmallRanges(isReaderLazyReadSmallRanges(session));
OptionalLong transactionId = raptorSplit.getTransactionId();
if (raptorSplit.getShardUuids().size() == 1) {
UUID shardUuid = raptorSplit.getShardUuids().iterator().next();
return createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId);
}
Iterator<ConnectorPageSource> iterator = raptorSplit.getShardUuids().stream().map(shardUuid -> createPageSource(shardUuid, bucketNumber, columns, predicate, options, transactionId)).iterator();
return new ConcatPageSource(iterator);
}
Aggregations