Search in sources :

Example 11 with FixedSplitSource

use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.

the class AccumuloSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    AccumuloTableHandle handle = (AccumuloTableHandle) tableHandle;
    String schemaName = handle.getSchema();
    String tableName = handle.getTable();
    String rowIdName = handle.getRowId();
    // Get non-row ID column constraints
    List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, handle.getConstraint());
    // Get the row domain column range
    Optional<Domain> rDom = getRangeDomain(rowIdName, handle.getConstraint());
    // Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
    List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, handle.getSerializerInstance());
    // Pack the tablet split metadata into a connector split
    ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
    for (TabletSplitMetadata splitMetadata : tabletSplits) {
        AccumuloSplit split = new AccumuloSplit(splitMetadata.getRanges().stream().map(SerializedRange::serialize).collect(Collectors.toList()), splitMetadata.getHostPort());
        cSplits.add(split);
    }
    return new FixedSplitSource(cSplits.build());
}
Also used : AccumuloColumnConstraint(io.trino.plugin.accumulo.model.AccumuloColumnConstraint) ImmutableList(com.google.common.collect.ImmutableList) TabletSplitMetadata(io.trino.plugin.accumulo.model.TabletSplitMetadata) AccumuloSplit(io.trino.plugin.accumulo.model.AccumuloSplit) SerializedRange(io.trino.plugin.accumulo.model.SerializedRange) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) AccumuloTableHandle(io.trino.plugin.accumulo.model.AccumuloTableHandle) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) ColumnDomain(io.trino.spi.predicate.TupleDomain.ColumnDomain) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 12 with FixedSplitSource

use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTable.getSchemaTableName();
    // get table metadata
    SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
    Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // verify table is not marked as non-readable
    String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
    if (!isNullOrEmpty(tableNotReadable)) {
        throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
    }
    // get partitions
    List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
    // short circuit if we don't have any partitions
    if (partitions.isEmpty()) {
        if (hiveTable.isRecordScannedFiles()) {
            return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
        }
        return new FixedSplitSource(ImmutableList.of());
    }
    // get buckets from first partition (arbitrary)
    Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
    if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
    // Only one thread per partition is usable when a table is not transactional
    int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
    HiveSplitSource splitSource;
    switch(splitSchedulingStrategy) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
    }
    hiveSplitLoader.start(splitSource);
    return splitSource;
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Iterables.transform(com.google.common.collect.Iterables.transform) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveSessionProperties.isOptimizeSymlinkListing(io.trino.plugin.hive.HiveSessionProperties.isOptimizeSymlinkListing) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) HiveSessionProperties.isIgnoreAbsentPartitions(io.trino.plugin.hive.HiveSessionProperties.isIgnoreAbsentPartitions) HiveSessionProperties.isUseParquetColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Iterables.concat(com.google.common.collect.Iterables.concat) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ENGLISH(java.util.Locale.ENGLISH) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) TableToPartitionMapping.mapColumnsByIndex(io.trino.plugin.hive.TableToPartitionMapping.mapColumnsByIndex) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) Math.min(java.lang.Math.min) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) VersionEmbedder(io.trino.spi.VersionEmbedder) DynamicFilter(io.trino.spi.connector.DynamicFilter) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HiveStorageFormat.getHiveStorageFormat(io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat) SERVER_SHUTTING_DOWN(io.trino.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) HiveCoercionPolicy.canCoerce(io.trino.plugin.hive.util.HiveCoercionPolicy.canCoerce) Partition(io.trino.plugin.hive.metastore.Partition) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Nested(org.weakref.jmx.Nested) BucketSplitInfo.createBucketSplitInfo(io.trino.plugin.hive.BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) CounterStat(io.airlift.stats.CounterStat) UNPARTITIONED_ID(io.trino.plugin.hive.HivePartition.UNPARTITIONED_ID) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) HIVE_PARTITION_DROPPED_DURING_QUERY(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) Inject(javax.inject.Inject) GROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) AbstractIterator(com.google.common.collect.AbstractIterator) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Ordering(com.google.common.collect.Ordering) HiveSessionProperties.getDynamicFilteringWaitTimeout(io.trino.plugin.hive.HiveSessionProperties.getDynamicFilteringWaitTimeout) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException)

Example 13 with FixedSplitSource

use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.

the class JmxSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JmxTableHandle tableHandle = (JmxTableHandle) table;
    // TODO is there a better way to get the node column?
    Optional<JmxColumnHandle> nodeColumnHandle = tableHandle.getColumnHandles().stream().filter(jmxColumnHandle -> jmxColumnHandle.getColumnName().equals(NODE_COLUMN_NAME)).findFirst();
    checkState(nodeColumnHandle.isPresent(), "Failed to find %s column", NODE_COLUMN_NAME);
    TupleDomain<ColumnHandle> nodeFilter = tableHandle.getNodeFilter();
    List<ConnectorSplit> splits = nodeManager.getAllNodes().stream().filter(node -> {
        NullableValue value = NullableValue.of(createUnboundedVarcharType(), utf8Slice(node.getNodeIdentifier()));
        return nodeFilter.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle.get(), value)));
    }).map(node -> new JmxSplit(ImmutableList.of(node.getHostAndPort()))).collect(toList());
    return new FixedSplitSource(splits);
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) ImmutableMap(com.google.common.collect.ImmutableMap) NullableValue(io.trino.spi.predicate.NullableValue) NodeManager(io.trino.spi.NodeManager) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Inject(javax.inject.Inject) TupleDomain.fromFixedValues(io.trino.spi.predicate.TupleDomain.fromFixedValues) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) NODE_COLUMN_NAME(io.trino.plugin.jmx.JmxMetadata.NODE_COLUMN_NAME) DynamicFilter(io.trino.spi.connector.DynamicFilter) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Optional(java.util.Optional) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) NullableValue(io.trino.spi.predicate.NullableValue) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 14 with FixedSplitSource

use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.

the class PhoenixSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    JdbcTableHandle tableHandle = (JdbcTableHandle) table;
    try (Connection connection = phoenixClient.getConnection(session)) {
        List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
        PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
        int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
        List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
        return new FixedSplitSource(splits);
    } catch (IOException | SQLException e) {
        throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
    }
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) KeyRange(org.apache.phoenix.query.KeyRange) Connection(java.sql.Connection) Logger(io.airlift.log.Logger) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) QueryPlan(org.apache.phoenix.compile.QueryPlan) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Objects.requireNonNull(java.util.Objects.requireNonNull) Bytes(org.apache.hadoop.hbase.util.Bytes) TableName(org.apache.hadoop.hbase.TableName) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) Scan(org.apache.hadoop.hbase.client.Scan) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) DynamicFilter(io.trino.spi.connector.DynamicFilter) PHOENIX_INTERNAL_ERROR(io.trino.plugin.phoenix5.PhoenixErrorCode.PHOENIX_INTERNAL_ERROR) Optional(java.util.Optional) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) PHOENIX_SPLIT_ERROR(io.trino.plugin.phoenix5.PhoenixErrorCode.PHOENIX_SPLIT_ERROR) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) HostAddress(io.trino.spi.HostAddress) EXPECTED_UPPER_REGION_KEY(org.apache.phoenix.coprocessor.BaseScannerRegionObserver.EXPECTED_UPPER_REGION_KEY) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) SQLException(java.sql.SQLException) Connection(java.sql.Connection) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) IOException(java.io.IOException) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) PhoenixInputSplit(org.apache.phoenix.mapreduce.PhoenixInputSplit) TrinoException(io.trino.spi.TrinoException) PhoenixPreparedStatement(org.apache.phoenix.jdbc.PhoenixPreparedStatement) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 15 with FixedSplitSource

use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.

the class TpcdsSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
    checkState(!nodes.isEmpty(), "No TPCDS nodes available");
    int totalParts = nodes.size() * splitsPerNode;
    int partNumber = 0;
    // Split the data using split and skew by the number of nodes available.
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    for (Node node : nodes) {
        for (int i = 0; i < splitsPerNode; i++) {
            splits.add(new TpcdsSplit(partNumber, totalParts, ImmutableList.of(node.getHostAndPort()), noSexism));
            partNumber++;
        }
    }
    return new FixedSplitSource(splits.build());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) Node(io.trino.spi.Node) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Aggregations

FixedSplitSource (io.trino.spi.connector.FixedSplitSource)16 ConnectorSplit (io.trino.spi.connector.ConnectorSplit)12 ImmutableList (com.google.common.collect.ImmutableList)11 HostAddress (io.trino.spi.HostAddress)6 ConnectorSession (io.trino.spi.connector.ConnectorSession)5 ConnectorSplitManager (io.trino.spi.connector.ConnectorSplitManager)5 ConnectorSplitSource (io.trino.spi.connector.ConnectorSplitSource)5 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)5 ConnectorTransactionHandle (io.trino.spi.connector.ConnectorTransactionHandle)5 DynamicFilter (io.trino.spi.connector.DynamicFilter)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 Inject (javax.inject.Inject)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 TrinoException (io.trino.spi.TrinoException)4 Lists (com.google.common.collect.Lists)3 Node (io.trino.spi.Node)3 ColumnHandle (io.trino.spi.connector.ColumnHandle)3