Search in sources :

Example 16 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTable.getSchemaTableName();
    // get table metadata
    SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
    Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // verify table is not marked as non-readable
    String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
    if (!isNullOrEmpty(tableNotReadable)) {
        throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
    }
    // get partitions
    List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
    // short circuit if we don't have any partitions
    if (partitions.isEmpty()) {
        if (hiveTable.isRecordScannedFiles()) {
            return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
        }
        return new FixedSplitSource(ImmutableList.of());
    }
    // get buckets from first partition (arbitrary)
    Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
    if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
    // Only one thread per partition is usable when a table is not transactional
    int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
    HiveSplitSource splitSource;
    switch(splitSchedulingStrategy) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
    }
    hiveSplitLoader.start(splitSource);
    return splitSource;
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Iterables.transform(com.google.common.collect.Iterables.transform) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveSessionProperties.isOptimizeSymlinkListing(io.trino.plugin.hive.HiveSessionProperties.isOptimizeSymlinkListing) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) HiveSessionProperties.isIgnoreAbsentPartitions(io.trino.plugin.hive.HiveSessionProperties.isIgnoreAbsentPartitions) HiveSessionProperties.isUseParquetColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Iterables.concat(com.google.common.collect.Iterables.concat) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ENGLISH(java.util.Locale.ENGLISH) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) TableToPartitionMapping.mapColumnsByIndex(io.trino.plugin.hive.TableToPartitionMapping.mapColumnsByIndex) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) Math.min(java.lang.Math.min) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) VersionEmbedder(io.trino.spi.VersionEmbedder) DynamicFilter(io.trino.spi.connector.DynamicFilter) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HiveStorageFormat.getHiveStorageFormat(io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat) SERVER_SHUTTING_DOWN(io.trino.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) HiveCoercionPolicy.canCoerce(io.trino.plugin.hive.util.HiveCoercionPolicy.canCoerce) Partition(io.trino.plugin.hive.metastore.Partition) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Nested(org.weakref.jmx.Nested) BucketSplitInfo.createBucketSplitInfo(io.trino.plugin.hive.BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) CounterStat(io.airlift.stats.CounterStat) UNPARTITIONED_ID(io.trino.plugin.hive.HivePartition.UNPARTITIONED_ID) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) HIVE_PARTITION_DROPPED_DURING_QUERY(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) Inject(javax.inject.Inject) GROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) AbstractIterator(com.google.common.collect.AbstractIterator) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Ordering(com.google.common.collect.Ordering) HiveSessionProperties.getDynamicFilteringWaitTimeout(io.trino.plugin.hive.HiveSessionProperties.getDynamicFilteringWaitTimeout) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException)

Example 17 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class JmxSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JmxTableHandle tableHandle = (JmxTableHandle) table;
    // TODO is there a better way to get the node column?
    Optional<JmxColumnHandle> nodeColumnHandle = tableHandle.getColumnHandles().stream().filter(jmxColumnHandle -> jmxColumnHandle.getColumnName().equals(NODE_COLUMN_NAME)).findFirst();
    checkState(nodeColumnHandle.isPresent(), "Failed to find %s column", NODE_COLUMN_NAME);
    TupleDomain<ColumnHandle> nodeFilter = tableHandle.getNodeFilter();
    List<ConnectorSplit> splits = nodeManager.getAllNodes().stream().filter(node -> {
        NullableValue value = NullableValue.of(createUnboundedVarcharType(), utf8Slice(node.getNodeIdentifier()));
        return nodeFilter.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle.get(), value)));
    }).map(node -> new JmxSplit(ImmutableList.of(node.getHostAndPort()))).collect(toList());
    return new FixedSplitSource(splits);
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableMap(com.google.common.collect.ImmutableMap) NullableValue(io.trino.spi.predicate.NullableValue) NodeManager(io.trino.spi.NodeManager) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Inject(javax.inject.Inject) TupleDomain.fromFixedValues(io.trino.spi.predicate.TupleDomain.fromFixedValues) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) NODE_COLUMN_NAME(io.trino.plugin.jmx.JmxMetadata.NODE_COLUMN_NAME) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Optional(java.util.Optional) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) NullableValue(io.trino.spi.predicate.NullableValue) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 18 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class TestRaptorSplitManager method testSanity.

@Test
public void testSanity() {
    ConnectorSplitSource splitSource = getSplits(raptorSplitManager, tableHandle);
    int splitCount = 0;
    while (!splitSource.isFinished()) {
        splitCount += getSplits(splitSource, 1000).size();
    }
    assertEquals(splitCount, 4);
}
Also used : ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) Test(org.testng.annotations.Test)

Example 19 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class TestRaptorSplitManager method testNoNodes.

@Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "No nodes available to run query")
public void testNoNodes() {
    deleteShardNodes();
    RaptorSplitManager raptorSplitManagerWithBackup = new RaptorSplitManager(new CatalogName("fbraptor"), ImmutableSet::of, shardManager, true);
    ConnectorSplitSource splitSource = getSplits(raptorSplitManagerWithBackup, tableHandle);
    getSplits(splitSource, 1000);
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) CatalogName(io.trino.plugin.base.CatalogName) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) RaptorSplitManager(io.trino.plugin.raptor.legacy.RaptorSplitManager) Test(org.testng.annotations.Test)

Example 20 with ConnectorSplitSource

use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.

the class TestRaptorSplitManager method testAssignRandomNodeWhenBackupAvailable.

@Test
public void testAssignRandomNodeWhenBackupAvailable() throws URISyntaxException {
    TestingNodeManager nodeManager = new TestingNodeManager();
    CatalogName connectorId = new CatalogName("raptor");
    NodeSupplier nodeSupplier = nodeManager::getWorkerNodes;
    InternalNode node = new InternalNode(UUID.randomUUID().toString(), new URI("http://127.0.0.1/"), NodeVersion.UNKNOWN, false);
    nodeManager.addNode(node);
    RaptorSplitManager raptorSplitManagerWithBackup = new RaptorSplitManager(connectorId, nodeSupplier, shardManager, true);
    deleteShardNodes();
    ConnectorSplitSource partitionSplit = getSplits(raptorSplitManagerWithBackup, tableHandle);
    List<ConnectorSplit> batch = getSplits(partitionSplit, 1);
    assertEquals(getOnlyElement(getOnlyElement(batch).getAddresses()), node.getHostAndPort());
}
Also used : TestingNodeManager(io.trino.testing.TestingNodeManager) CatalogName(io.trino.plugin.base.CatalogName) InternalNode(io.trino.metadata.InternalNode) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) URI(java.net.URI) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) NodeSupplier(io.trino.plugin.raptor.legacy.NodeSupplier) RaptorSplitManager(io.trino.plugin.raptor.legacy.RaptorSplitManager) Test(org.testng.annotations.Test)

Aggregations

ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)26 Test (org.testng.annotations.Test)14 ConnectorSession (io.trino.spi.connector.ConnectorSession)12 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)12 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)10 ImmutableList (com.google.common.collect.ImmutableList)6 ColumnHandle (io.trino.spi.connector.ColumnHandle)6 ConnectorSplitManager (io.trino.spi.connector.ConnectorSplitManager)6 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)6 TrinoException (io.trino.spi.TrinoException)5 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)5 DynamicFilter (io.trino.spi.connector.DynamicFilter)5 FixedSplitSource (io.trino.spi.connector.FixedSplitSource)5 List (java.util.List)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 Inject (javax.inject.Inject)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 TestingConnectorSession (io.trino.testing.TestingConnectorSession)4 URI (java.net.URI)4