Search in sources :

Example 1 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class AbstractTestHiveFileSystem method createTable.

private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
    MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
        // Hack to work around the metastore not being configured for S3 or other FS.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for file system.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct location.
        metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
        // verify the data
        metadata.beginQuery(session);
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
            MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
        metadata.cleanupQuery(session);
    }
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) AbstractTestHive.filterNonHiddenColumnMetadata(io.trino.plugin.hive.AbstractTestHive.filterNonHiddenColumnMetadata) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Transaction(io.trino.plugin.hive.AbstractTestHive.Transaction) HiveTransaction(io.trino.plugin.hive.AbstractTestHive.HiveTransaction) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Example 2 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class AbstractTestHive method testGetPartitionSplitsBatch.

@Test
public void testGetPartitionSplitsBatch() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
        assertEquals(getSplitCount(splitSource), tablePartitionFormatPartitions.size());
    }
}
Also used : ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 3 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class AbstractTestHive method testGetPartitionSplitsBatchUnpartitioned.

@Test
public void testGetPartitionSplitsBatchUnpartitioned() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned);
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
        assertEquals(getSplitCount(splitSource), 1);
    }
}
Also used : ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 4 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class TestDeltaLakeSplitManager method getSplits.

private List<DeltaLakeSplit> getSplits(DeltaLakeSplitManager splitManager, DeltaLakeConfig deltaLakeConfig) throws ExecutionException, InterruptedException {
    ConnectorSplitSource splitSource = splitManager.getSplits(// ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle handle, SplitSchedulingStrategy splitSchedulingStrategy
    new HiveTransactionHandle(false), testingConnectorSessionWithConfig(deltaLakeConfig), tableHandle, ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING, DynamicFilter.EMPTY, Constraint.alwaysTrue());
    ImmutableList.Builder<DeltaLakeSplit> splits = ImmutableList.builder();
    while (!splitSource.isFinished()) {
        List<ConnectorSplit> nextBatch = splitSource.getNextBatch(NOT_PARTITIONED, 10).get().getSplits();
        splits.addAll(nextBatch.stream().map(split -> (DeltaLakeSplit) split).collect(Collectors.toList()));
    }
    return splits.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) HiveTransactionHandle(io.trino.plugin.hive.HiveTransactionHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 5 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class KafkaSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    KafkaTableHandle kafkaTableHandle = (KafkaTableHandle) table;
    try (KafkaConsumer<byte[], byte[]> kafkaConsumer = consumerFactory.create(session)) {
        List<PartitionInfo> partitionInfos = kafkaConsumer.partitionsFor(kafkaTableHandle.getTopicName());
        List<TopicPartition> topicPartitions = partitionInfos.stream().map(KafkaSplitManager::toTopicPartition).collect(toImmutableList());
        Map<TopicPartition, Long> partitionBeginOffsets = kafkaConsumer.beginningOffsets(topicPartitions);
        Map<TopicPartition, Long> partitionEndOffsets = kafkaConsumer.endOffsets(topicPartitions);
        KafkaFilteringResult kafkaFilteringResult = kafkaFilterManager.getKafkaFilterResult(session, kafkaTableHandle, partitionInfos, partitionBeginOffsets, partitionEndOffsets);
        partitionInfos = kafkaFilteringResult.getPartitionInfos();
        partitionBeginOffsets = kafkaFilteringResult.getPartitionBeginOffsets();
        partitionEndOffsets = kafkaFilteringResult.getPartitionEndOffsets();
        ImmutableList.Builder<KafkaSplit> splits = ImmutableList.builder();
        Optional<String> keyDataSchemaContents = contentSchemaReader.readKeyContentSchema(kafkaTableHandle);
        Optional<String> messageDataSchemaContents = contentSchemaReader.readValueContentSchema(kafkaTableHandle);
        for (PartitionInfo partitionInfo : partitionInfos) {
            TopicPartition topicPartition = toTopicPartition(partitionInfo);
            HostAddress leader = HostAddress.fromParts(partitionInfo.leader().host(), partitionInfo.leader().port());
            new Range(partitionBeginOffsets.get(topicPartition), partitionEndOffsets.get(topicPartition)).partition(messagesPerSplit).stream().map(range -> new KafkaSplit(kafkaTableHandle.getTopicName(), kafkaTableHandle.getKeyDataFormat(), kafkaTableHandle.getMessageDataFormat(), keyDataSchemaContents, messageDataSchemaContents, partitionInfo.partition(), range, leader)).forEach(splits::add);
        }
        return new FixedSplitSource(splits.build());
    } catch (Exception e) {
        // Catch all exceptions because Kafka library is written in scala and checked exceptions are not declared in method signature.
        if (e instanceof TrinoException) {
            throw e;
        }
        throw new TrinoException(KAFKA_SPLIT_ERROR, format("Cannot list splits for table '%s' reading topic '%s'", kafkaTableHandle.getTableName(), kafkaTableHandle.getTopicName()), e);
    }
}
Also used : TopicPartition(org.apache.kafka.common.TopicPartition) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) String.format(java.lang.String.format) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Inject(javax.inject.Inject) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ContentSchemaReader(io.trino.plugin.kafka.schema.ContentSchemaReader) Optional(java.util.Optional) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) HostAddress(io.trino.spi.HostAddress) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) TopicPartition(org.apache.kafka.common.TopicPartition) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Aggregations

ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)26 Test (org.testng.annotations.Test)14 ConnectorSession (io.trino.spi.connector.ConnectorSession)12 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)12 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)10 ImmutableList (com.google.common.collect.ImmutableList)6 ColumnHandle (io.trino.spi.connector.ColumnHandle)6 ConnectorSplitManager (io.trino.spi.connector.ConnectorSplitManager)6 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)6 TrinoException (io.trino.spi.TrinoException)5 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)5 DynamicFilter (io.trino.spi.connector.DynamicFilter)5 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)5 List (java.util.List)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 Inject (javax.inject.Inject)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 TestingConnectorSession (io.trino.testing.TestingConnectorSession)4 URI (java.net.URI)4