use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.
the class AccumuloSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
AccumuloTableHandle handle = (AccumuloTableHandle) tableHandle;
String schemaName = handle.getSchema();
String tableName = handle.getTable();
String rowIdName = handle.getRowId();
// Get non-row ID column constraints
List<AccumuloColumnConstraint> constraints = getColumnConstraints(rowIdName, handle.getConstraint());
// Get the row domain column range
Optional<Domain> rDom = getRangeDomain(rowIdName, handle.getConstraint());
// Call out to our client to retrieve all tablet split metadata using the row ID domain and the secondary index
List<TabletSplitMetadata> tabletSplits = client.getTabletSplits(session, schemaName, tableName, rDom, constraints, handle.getSerializerInstance());
// Pack the tablet split metadata into a connector split
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
for (TabletSplitMetadata splitMetadata : tabletSplits) {
AccumuloSplit split = new AccumuloSplit(splitMetadata.getRanges().stream().map(SerializedRange::serialize).collect(Collectors.toList()), splitMetadata.getHostPort());
cSplits.add(split);
}
return new FixedSplitSource(cSplits.build());
}
use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
if (hiveTable.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
// Only one thread per partition is usable when a table is not transactional
int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
HiveSplitSource splitSource;
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.
the class JmxSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
JmxTableHandle tableHandle = (JmxTableHandle) table;
// TODO is there a better way to get the node column?
Optional<JmxColumnHandle> nodeColumnHandle = tableHandle.getColumnHandles().stream().filter(jmxColumnHandle -> jmxColumnHandle.getColumnName().equals(NODE_COLUMN_NAME)).findFirst();
checkState(nodeColumnHandle.isPresent(), "Failed to find %s column", NODE_COLUMN_NAME);
TupleDomain<ColumnHandle> nodeFilter = tableHandle.getNodeFilter();
List<ConnectorSplit> splits = nodeManager.getAllNodes().stream().filter(node -> {
NullableValue value = NullableValue.of(createUnboundedVarcharType(), utf8Slice(node.getNodeIdentifier()));
return nodeFilter.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle.get(), value)));
}).map(node -> new JmxSplit(ImmutableList.of(node.getHostAndPort()))).collect(toList());
return new FixedSplitSource(splits);
}
use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.
the class PhoenixSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
JdbcTableHandle tableHandle = (JdbcTableHandle) table;
try (Connection connection = phoenixClient.getConnection(session)) {
List<JdbcColumnHandle> columns = tableHandle.getColumns().map(columnSet -> columnSet.stream().map(JdbcColumnHandle.class::cast).collect(toList())).orElseGet(() -> phoenixClient.getColumns(session, tableHandle));
PhoenixPreparedStatement inputQuery = (PhoenixPreparedStatement) phoenixClient.prepareStatement(session, connection, tableHandle, columns, Optional.empty());
int maxScansPerSplit = session.getProperty(PhoenixSessionProperties.MAX_SCANS_PER_SPLIT, Integer.class);
List<ConnectorSplit> splits = getSplits(inputQuery, maxScansPerSplit).stream().map(PhoenixInputSplit.class::cast).map(split -> new PhoenixSplit(getSplitAddresses(split), SerializedPhoenixInputSplit.serialize(split))).collect(toImmutableList());
return new FixedSplitSource(splits);
} catch (IOException | SQLException e) {
throw new TrinoException(PHOENIX_SPLIT_ERROR, "Couldn't get Phoenix splits", e);
}
}
use of io.trino.spi.connector.FixedSplitSource in project trino by trinodb.
the class TpcdsSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
Set<Node> nodes = nodeManager.getRequiredWorkerNodes();
checkState(!nodes.isEmpty(), "No TPCDS nodes available");
int totalParts = nodes.size() * splitsPerNode;
int partNumber = 0;
// Split the data using split and skew by the number of nodes available.
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
for (Node node : nodes) {
for (int i = 0; i < splitsPerNode; i++) {
splits.add(new TpcdsSplit(partNumber, totalParts, ImmutableList.of(node.getHostAndPort()), noSexism));
partNumber++;
}
}
return new FixedSplitSource(splits.build());
}
Aggregations