use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class AbstractTestHiveFileSystem method createTable.
private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
// Hack to work around the metastore not being configured for S3 or other FS.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for file system.
// We work around that by using a dummy location when creating the
// table and update it here to the correct location.
metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
metadata.beginQuery(session);
ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
metadata.cleanupQuery(session);
}
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class AbstractTestHive method testGetPartitionSplitsBatch.
@Test
public void testGetPartitionSplitsBatch() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
assertEquals(getSplitCount(splitSource), tablePartitionFormatPartitions.size());
}
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class AbstractTestHive method testGetPartitionSplitsBatchUnpartitioned.
@Test
public void testGetPartitionSplitsBatchUnpartitioned() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned);
ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
assertEquals(getSplitCount(splitSource), 1);
}
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class TestDeltaLakeSplitManager method getSplits.
private List<DeltaLakeSplit> getSplits(DeltaLakeSplitManager splitManager, DeltaLakeConfig deltaLakeConfig) throws ExecutionException, InterruptedException {
ConnectorSplitSource splitSource = splitManager.getSplits(// ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle handle, SplitSchedulingStrategy splitSchedulingStrategy
new HiveTransactionHandle(false), testingConnectorSessionWithConfig(deltaLakeConfig), tableHandle, ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING, DynamicFilter.EMPTY, Constraint.alwaysTrue());
ImmutableList.Builder<DeltaLakeSplit> splits = ImmutableList.builder();
while (!splitSource.isFinished()) {
List<ConnectorSplit> nextBatch = splitSource.getNextBatch(NOT_PARTITIONED, 10).get().getSplits();
splits.addAll(nextBatch.stream().map(split -> (DeltaLakeSplit) split).collect(Collectors.toList()));
}
return splits.build();
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class KafkaSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
partitionInfos = kafkaFilteringResult.getPartitionInfos();
partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
for (PartitionInfo partitionInfo : partitionInfos) {
TopicPartition topicPartition = toTopicPartition(partitionInfo);
HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
}
return new FixedSplitSource(splits.build());
} catch (Exception e) {
// Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
if (e instanceof TrinoException) {
throw e;
}
throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
}
}
Aggregations