use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
if (hiveTable.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
// Only one thread per partition is usable when a table is not transactional
int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
HiveSplitSource splitSource;
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
return splitSource;
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class JmxSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
JmxTableHandle tableHandle = (JmxTableHandle) table;
// TODO is there a better way to get the node column?
Optional<JmxColumnHandle> nodeColumnHandle = tableHandle.getColumnHandles().stream().filter(jmxColumnHandle -> jmxColumnHandle.getColumnName().equals(NODE_COLUMN_NAME)).findFirst();
checkState(nodeColumnHandle.isPresent(), "Failed to find %s column", NODE_COLUMN_NAME);
TupleDomain<ColumnHandle> nodeFilter = tableHandle.getNodeFilter();
List<ConnectorSplit> splits = nodeManager.getAllNodes().stream().filter(node -> {
NullableValue value = NullableValue.of(createUnboundedVarcharType(), utf8Slice(node.getNodeIdentifier()));
return nodeFilter.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle.get(), value)));
}).map(node -> new JmxSplit(ImmutableList.of(node.getHostAndPort()))).collect(toList());
return new FixedSplitSource(splits);
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class TestRaptorSplitManager method testSanity.
@Test
public void testSanity() {
ConnectorSplitSource splitSource = getSplits(raptorSplitManager, tableHandle);
int splitCount = 0;
while (!splitSource.isFinished()) {
splitCount += getSplits(splitSource, 1000).size();
}
assertEquals(splitCount, 4);
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class TestRaptorSplitManager method testNoNodes.
@Test(expectedExceptions = TrinoException.class, expectedExceptionsMessageRegExp = "No nodes available to run query")
public void testNoNodes() {
deleteShardNodes();
RaptorSplitManager raptorSplitManagerWithBackup = new RaptorSplitManager(new CatalogName("fbraptor"), ImmutableSet::of, shardManager, true);
ConnectorSplitSource splitSource = getSplits(raptorSplitManagerWithBackup, tableHandle);
getSplits(splitSource, 1000);
}
use of io.trino.spi.connector.ConnectorSplitSource in project trino by trinodb.
the class TestRaptorSplitManager method testAssignRandomNodeWhenBackupAvailable.
@Test
public void testAssignRandomNodeWhenBackupAvailable() throws URISyntaxException {
TestingNodeManager nodeManager = new TestingNodeManager();
CatalogName connectorId = new CatalogName("raptor");
NodeSupplier nodeSupplier = nodeManager::getWorkerNodes;
InternalNode node = new InternalNode(UUID.randomUUID().toString(), new URI("http://127.0.0.1/"), NodeVersion.UNKNOWN, false);
nodeManager.addNode(node);
RaptorSplitManager raptorSplitManagerWithBackup = new RaptorSplitManager(connectorId, nodeSupplier, shardManager, true);
deleteShardNodes();
ConnectorSplitSource partitionSplit = getSplits(raptorSplitManagerWithBackup, tableHandle);
List<ConnectorSplit> batch = getSplits(partitionSplit, 1);
assertEquals(getOnlyElement(getOnlyElement(batch).getAddresses()), node.getHostAndPort());
}
Aggregations