Search in sources :

Example 1 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class SystemSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    SystemTableHandle table = (SystemTableHandle) tableHandle;
    TupleDomain<ColumnHandle> constraint = table.getConstraint();
    SystemTable systemTable = tables.getSystemTable(session, table.getSchemaTableName()).orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()));
    Distribution tableDistributionMode = systemTable.getDistribution();
    if (tableDistributionMode == SINGLE_COORDINATOR) {
        HostAddress address = nodeManager.getCurrentNode().getHostAndPort();
        ConnectorSplit split = new SystemSplit(address, constraint);
        return new FixedSplitSource(ImmutableList.of(split));
    }
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    ImmutableSet.Builder<InternalNode> nodes = ImmutableSet.builder();
    if (tableDistributionMode == ALL_COORDINATORS) {
        nodes.addAll(nodeManager.getCoordinators());
    } else if (tableDistributionMode == ALL_NODES) {
        nodes.addAll(nodeManager.getNodes(ACTIVE));
    }
    Set<InternalNode> nodeSet = nodes.build();
    for (InternalNode node : nodeSet) {
        splits.add(new SystemSplit(node.getHostAndPort(), constraint));
    }
    return new FixedSplitSource(splits.build());
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableList(com.google.common.collect.ImmutableList) HostAddress(io.trino.spi.HostAddress) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ImmutableSet(com.google.common.collect.ImmutableSet) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Distribution(io.trino.spi.connector.SystemTable.Distribution) SystemTable(io.trino.spi.connector.SystemTable) InternalNode(io.trino.metadata.InternalNode) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 2 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class ExampleSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle connectorTableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    ExampleTableHandle tableHandle = (ExampleTableHandle) connectorTableHandle;
    ExampleTable table = exampleClient.getTable(tableHandle.getSchemaName(), tableHandle.getTableName());
    // this can happen if table is removed during a query
    if (table == null) {
        throw new TableNotFoundException(tableHandle.toSchemaTableName());
    }
    List<ConnectorSplit> splits = new ArrayList<>();
    for (URI uri : table.getSources()) {
        splits.add(new ExampleSplit(uri.toString()));
    }
    Collections.shuffle(splits);
    return new FixedSplitSource(splits);
}
Also used : TableNotFoundException(io.trino.spi.connector.TableNotFoundException) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) ArrayList(java.util.ArrayList) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) URI(java.net.URI)

Example 3 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class HiveSplitSource method getNextBatch.

@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
    boolean noMoreSplits;
    State state = stateReference.get();
    switch(state.getKind()) {
        case INITIAL:
            noMoreSplits = false;
            break;
        case NO_MORE_SPLITS:
            noMoreSplits = true;
            break;
        case FAILED:
            return failedFuture(state.getThrowable());
        case CLOSED:
            throw new IllegalStateException("HiveSplitSource is already closed");
        default:
            throw new UnsupportedOperationException();
    }
    OptionalInt bucketNumber = toBucketNumber(partitionHandle);
    ListenableFuture<List<ConnectorSplit>> future = queues.borrowBatchAsync(bucketNumber, maxSize, internalSplits -> {
        ImmutableList.Builder<InternalHiveSplit> splitsToInsertBuilder = ImmutableList.builder();
        ImmutableList.Builder<ConnectorSplit> resultBuilder = ImmutableList.builder();
        int removedEstimatedSizeInBytes = 0;
        int removedSplitCount = 0;
        for (InternalHiveSplit internalSplit : internalSplits) {
            // Perform one more dynamic filter check immediately before split is returned to the engine
            if (!internalSplit.getPartitionMatchSupplier().getAsBoolean()) {
                removedEstimatedSizeInBytes += internalSplit.getEstimatedSizeInBytes();
                removedSplitCount++;
                continue;
            }
            long maxSplitBytes = maxSplitSize.toBytes();
            if (remainingInitialSplits.get() > 0) {
                if (remainingInitialSplits.getAndDecrement() > 0) {
                    maxSplitBytes = maxInitialSplitSize.toBytes();
                }
            }
            InternalHiveBlock block = internalSplit.currentBlock();
            long splitBytes;
            if (internalSplit.isSplittable()) {
                long remainingBlockBytes = block.getEnd() - internalSplit.getStart();
                if (remainingBlockBytes <= maxSplitBytes) {
                    splitBytes = remainingBlockBytes;
                } else if (maxSplitBytes * 2 >= remainingBlockBytes) {
                    // Second to last split in this block, generate two evenly sized splits
                    splitBytes = remainingBlockBytes / 2;
                } else {
                    splitBytes = maxSplitBytes;
                }
            } else {
                splitBytes = internalSplit.getEnd() - internalSplit.getStart();
            }
            resultBuilder.add(new HiveSplit(databaseName, tableName, internalSplit.getPartitionName(), internalSplit.getPath(), internalSplit.getStart(), splitBytes, internalSplit.getEstimatedFileSize(), internalSplit.getFileModifiedTime(), internalSplit.getSchema(), internalSplit.getPartitionKeys(), block.getAddresses(), internalSplit.getBucketNumber(), internalSplit.getStatementId(), internalSplit.isForceLocalScheduling(), internalSplit.getTableToPartitionMapping(), internalSplit.getBucketConversion(), internalSplit.getBucketValidation(), internalSplit.isS3SelectPushdownEnabled(), internalSplit.getAcidInfo(), numberOfProcessedSplits.getAndIncrement(), splitWeightProvider.weightForSplitSizeInBytes(splitBytes)));
            internalSplit.increaseStart(splitBytes);
            if (internalSplit.isDone()) {
                removedEstimatedSizeInBytes += internalSplit.getEstimatedSizeInBytes();
                removedSplitCount++;
            } else {
                splitsToInsertBuilder.add(internalSplit);
            }
        }
        estimatedSplitSizeInBytes.addAndGet(-removedEstimatedSizeInBytes);
        bufferedInternalSplitCount.addAndGet(-removedSplitCount);
        List<InternalHiveSplit> splitsToInsert = splitsToInsertBuilder.build();
        List<ConnectorSplit> result = resultBuilder.build();
        return new AsyncQueue.BorrowResult<>(splitsToInsert, result);
    });
    ListenableFuture<ConnectorSplitBatch> transform = Futures.transform(future, splits -> {
        requireNonNull(splits, "splits is null");
        if (recordScannedFiles) {
            splits.forEach(split -> scannedFilePaths.add(((HiveSplit) split).getPath()));
        }
        if (noMoreSplits) {
            // But an extra invocation likely doesn't matter.
            return new ConnectorSplitBatch(splits, splits.isEmpty() && queues.isFinished(bucketNumber));
        } else {
            return new ConnectorSplitBatch(splits, false);
        }
    }, directExecutor());
    return toCompletableFuture(transform);
}
Also used : InternalHiveBlock(io.trino.plugin.hive.InternalHiveSplit.InternalHiveBlock) ImmutableList(com.google.common.collect.ImmutableList) OptionalInt(java.util.OptionalInt) BorrowResult(io.trino.plugin.hive.util.AsyncQueue.BorrowResult) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 4 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class AbstractTestHive method doTestBucketSortedTables.

private void doTestBucketSortedTables(SchemaTableName table) throws IOException {
    int bucketCount = 3;
    int expectedRowCount = 0;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, RCBINARY).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", ASCENDING)).add(new SortingColumn("value_desc", DESCENDING)).build()).buildOrThrow());
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
        ThreadLocalRandom random = ThreadLocalRandom.current();
        for (int i = 0; i < 50; i++) {
            MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
            for (int j = 0; j < 1000; j++) {
                builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
                expectedRowCount++;
            }
            sink.appendPage(builder.build().toPage());
        }
        HdfsContext context = new HdfsContext(session);
        // verify we have enough temporary files per bucket to require multiple passes
        Path stagingPathRoot;
        if (isTemporaryStagingDirectoryEnabled(session)) {
            stagingPathRoot = new Path(getTemporaryStagingDirectoryPath(session).replace("${USER}", context.getIdentity().getUser()));
        } else {
            stagingPathRoot = getStagingPathRoot(outputHandle);
        }
        assertThat(listAllDataFiles(context, stagingPathRoot)).filteredOn(file -> file.contains(".tmp-sort.")).size().isGreaterThan(bucketCount * getHiveConfig().getMaxOpenSortFiles() * 2);
        // finish the write
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // verify there are no temporary files
        for (String file : listAllDataFiles(context, stagingPathRoot)) {
            assertThat(file).doesNotContain(".tmp-sort.");
        }
        // finish creating table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // verify that bucket files are sorted
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, table);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        // verify local sorting property
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true, "bucket_execution_enabled", false)), tableHandle);
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        assertEquals(properties.getLocalProperties(), ImmutableList.of(new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST)));
        assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty();
        List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
        assertThat(splits).hasSize(bucketCount);
        int actualRowCount = 0;
        for (ConnectorSplit split : splits) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
                String lastValueAsc = null;
                long lastValueDesc = -1;
                while (!pageSource.isFinished()) {
                    Page page = pageSource.getNextPage();
                    if (page == null) {
                        continue;
                    }
                    for (int i = 0; i < page.getPositionCount(); i++) {
                        Block blockAsc = page.getBlock(1);
                        Block blockDesc = page.getBlock(2);
                        assertFalse(blockAsc.isNull(i));
                        assertFalse(blockDesc.isNull(i));
                        String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
                        if (lastValueAsc != null) {
                            assertGreaterThanOrEqual(valueAsc, lastValueAsc);
                            if (valueAsc.equals(lastValueAsc)) {
                                long valueDesc = BIGINT.getLong(blockDesc, i);
                                if (lastValueDesc != -1) {
                                    assertLessThanOrEqual(valueDesc, lastValueDesc);
                                }
                                lastValueDesc = valueDesc;
                            } else {
                                lastValueDesc = -1;
                            }
                        }
                        lastValueAsc = valueAsc;
                        actualRowCount++;
                    }
                }
            }
        }
        assertThat(actualRowCount).isEqualTo(expectedRowCount);
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) SortingProperty(io.trino.spi.connector.SortingProperty) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) MapType(io.trino.spi.type.MapType) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) CharType.createCharType(io.trino.spi.type.CharType.createCharType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Block(io.trino.spi.block.Block) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 5 with ConnectorSplit

use of io.trino.spi.connector.ConnectorSplit in project trino by trinodb.

the class AbstractTestHiveFileSystem method createTable.

private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
    MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
        // Hack to work around the metastore not being configured for S3 or other FS.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for file system.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct location.
        metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
        // verify the data
        metadata.beginQuery(session);
        ConnectorSplitSource splitSource = getSplits(splitManager, transaction, session, tableHandle);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
            MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
        metadata.cleanupQuery(session);
    }
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) AbstractTestHive.filterNonHiddenColumnMetadata(io.trino.plugin.hive.AbstractTestHive.filterNonHiddenColumnMetadata) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Transaction(io.trino.plugin.hive.AbstractTestHive.Transaction) HiveTransaction(io.trino.plugin.hive.AbstractTestHive.HiveTransaction) Slice(io.airlift.slice.Slice) ConnectorSession(io.trino.spi.connector.ConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata)

Aggregations

ConnectorSplit (io.trino.spi.connector.ConnectorSplit)44 ColumnHandle (io.trino.spi.connector.ColumnHandle)21 ImmutableList (com.google.common.collect.ImmutableList)19 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)18 ConnectorSession (io.trino.spi.connector.ConnectorSession)17 List (java.util.List)14 Test (org.testng.annotations.Test)14 ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)13 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)13 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)12 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)11 Objects.requireNonNull (java.util.Objects.requireNonNull)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9 Inject (javax.inject.Inject)9 HostAddress (io.trino.spi.HostAddress)8 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)7 Constraint (io.trino.spi.connector.Constraint)7 DynamicFilter (io.trino.spi.connector.DynamicFilter)7 MaterializedResult (io.trino.testing.MaterializedResult)7 ArrayList (java.util.ArrayList)7