use of io.trino.metadata.InternalNode in project trino by trinodb.
the class SystemSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
SystemTableHandle table = (SystemTableHandle) tableHandle;
TupleDomain<ColumnHandle> constraint = table.getConstraint();
SystemTable systemTable = tables.getSystemTable(session, table.getSchemaTableName()).orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()));
Distribution tableDistributionMode = systemTable.getDistribution();
if (tableDistributionMode == SINGLE_COORDINATOR) {
HostAddress address = nodeManager.getCurrentNode().getHostAndPort();
ConnectorSplit split = new SystemSplit(address, constraint);
return new FixedSplitSource(ImmutableList.of(split));
}
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
ImmutableSet.Builder<InternalNode> nodes = ImmutableSet.builder();
if (tableDistributionMode == ALL_COORDINATORS) {
nodes.addAll(nodeManager.getCoordinators());
} else if (tableDistributionMode == ALL_NODES) {
nodes.addAll(nodeManager.getNodes(ACTIVE));
}
Set<InternalNode> nodeSet = nodes.build();
for (InternalNode node : nodeSet) {
splits.add(new SystemSplit(node.getHostAndPort(), constraint));
}
return new FixedSplitSource(splits.build());
}
use of io.trino.metadata.InternalNode in project trino by trinodb.
the class ClusterStatsResource method getClusterStats.
@ResourceSecurity(WEB_UI)
@GET
@Produces(MediaType.APPLICATION_JSON)
public ClusterStats getClusterStats() {
long runningQueries = 0;
long blockedQueries = 0;
long queuedQueries = 0;
long activeNodes = nodeManager.getNodes(NodeState.ACTIVE).stream().filter(node -> isIncludeCoordinator || !node.isCoordinator()).count();
long activeCoordinators = nodeManager.getNodes(NodeState.ACTIVE).stream().filter(InternalNode::isCoordinator).count();
long totalAvailableProcessors = clusterMemoryManager.getTotalAvailableProcessors();
long runningDrivers = 0;
double memoryReservation = 0;
long totalInputRows = dispatchManager.getStats().getConsumedInputRows().getTotalCount();
long totalInputBytes = dispatchManager.getStats().getConsumedInputBytes().getTotalCount();
long totalCpuTimeSecs = dispatchManager.getStats().getConsumedCpuTimeSecs().getTotalCount();
for (BasicQueryInfo query : dispatchManager.getQueries()) {
if (query.getState() == QueryState.QUEUED) {
queuedQueries++;
} else if (query.getState() == QueryState.RUNNING) {
if (query.getQueryStats().isFullyBlocked()) {
blockedQueries++;
} else {
runningQueries++;
}
}
if (!query.getState().isDone()) {
totalInputBytes += query.getQueryStats().getRawInputDataSize().toBytes();
totalInputRows += query.getQueryStats().getRawInputPositions();
totalCpuTimeSecs += query.getQueryStats().getTotalCpuTime().getValue(SECONDS);
memoryReservation += query.getQueryStats().getUserMemoryReservation().toBytes();
runningDrivers += query.getQueryStats().getRunningDrivers();
}
}
return new ClusterStats(runningQueries, blockedQueries, queuedQueries, activeCoordinators, activeNodes, runningDrivers, totalAvailableProcessors, memoryReservation, totalInputRows, totalInputBytes, totalCpuTimeSecs);
}
use of io.trino.metadata.InternalNode in project trino by trinodb.
the class FixedSourcePartitionedScheduler method schedule.
@Override
public ScheduleResult schedule() {
// schedule a task on every node in the distribution
List<RemoteTask> newTasks = ImmutableList.of();
if (scheduledTasks.isEmpty()) {
ImmutableList.Builder<RemoteTask> newTasksBuilder = ImmutableList.builder();
for (InternalNode node : nodes) {
Optional<RemoteTask> task = stageExecution.scheduleTask(node, partitionIdAllocator.getNextId(), ImmutableMultimap.of(), ImmutableMultimap.of());
if (task.isPresent()) {
scheduledTasks.put(node, task.get());
newTasksBuilder.add(task.get());
}
}
newTasks = newTasksBuilder.build();
}
boolean allBlocked = true;
List<ListenableFuture<Void>> blocked = new ArrayList<>();
BlockedReason blockedReason = BlockedReason.NO_ACTIVE_DRIVER_GROUP;
if (groupedLifespanScheduler.isPresent()) {
// Start new driver groups on the first scheduler if necessary,
// i.e. when previous ones have finished execution (not finished scheduling).
//
// Invoke schedule method to get a new SettableFuture every time.
// Reusing previously returned SettableFuture could lead to the ListenableFuture retaining too many listeners.
blocked.add(groupedLifespanScheduler.get().schedule(sourceSchedulers.get(0)));
}
int splitsScheduled = 0;
Iterator<SourceScheduler> schedulerIterator = sourceSchedulers.iterator();
List<Lifespan> driverGroupsToStart = ImmutableList.of();
boolean shouldInvokeNoMoreDriverGroups = false;
while (schedulerIterator.hasNext()) {
SourceScheduler sourceScheduler = schedulerIterator.next();
for (Lifespan lifespan : driverGroupsToStart) {
sourceScheduler.startLifespan(lifespan, partitionHandleFor(lifespan));
}
if (shouldInvokeNoMoreDriverGroups) {
sourceScheduler.noMoreLifespans();
}
ScheduleResult schedule = sourceScheduler.schedule();
splitsScheduled += schedule.getSplitsScheduled();
if (schedule.getBlockedReason().isPresent()) {
blocked.add(schedule.getBlocked());
blockedReason = blockedReason.combineWith(schedule.getBlockedReason().get());
} else {
verify(schedule.getBlocked().isDone(), "blockedReason not provided when scheduler is blocked");
allBlocked = false;
}
driverGroupsToStart = sourceScheduler.drainCompletedLifespans();
if (schedule.isFinished()) {
stageExecution.schedulingComplete(sourceScheduler.getPlanNodeId());
schedulerIterator.remove();
sourceScheduler.close();
shouldInvokeNoMoreDriverGroups = true;
} else {
shouldInvokeNoMoreDriverGroups = false;
}
}
if (allBlocked) {
return new ScheduleResult(sourceSchedulers.isEmpty(), newTasks, whenAnyComplete(blocked), blockedReason, splitsScheduled);
} else {
return new ScheduleResult(sourceSchedulers.isEmpty(), newTasks, splitsScheduled);
}
}
use of io.trino.metadata.InternalNode in project trino by trinodb.
the class ScaledWriterScheduler method scheduleTasks.
private List<RemoteTask> scheduleTasks(int count) {
if (count == 0) {
return ImmutableList.of();
}
List<InternalNode> nodes = nodeSelector.selectRandomNodes(count, scheduledNodes);
checkCondition(!scheduledNodes.isEmpty() || !nodes.isEmpty(), NO_NODES_AVAILABLE, "No nodes available to run query");
ImmutableList.Builder<RemoteTask> tasks = ImmutableList.builder();
for (InternalNode node : nodes) {
Optional<RemoteTask> remoteTask = stage.scheduleTask(node, scheduledNodes.size(), ImmutableMultimap.of(), ImmutableMultimap.of());
remoteTask.ifPresent(task -> {
tasks.add(task);
scheduledNodes.add(node);
});
}
return tasks.build();
}
use of io.trino.metadata.InternalNode in project trino by trinodb.
the class TopologyAwareNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
NodeMap nodeMap = this.nodeMap.get().get();
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
int[] topologicCounters = new int[topologicalSplitCounters.size()];
Set<NetworkLocation> filledLocations = new HashSet<>();
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
for (Split split : splits) {
SplitWeight splitWeight = split.getSplitWeight();
if (!split.isRemotelyAccessible()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = bestNodeSplitCount(splitWeight, candidateNodes.iterator(), minCandidates, maxPendingSplitsWeightPerTask, assignmentStats);
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
continue;
}
InternalNode chosenNode = null;
int depth = topologicalSplitCounters.size() - 1;
int chosenDepth = 0;
Set<NetworkLocation> locations = new HashSet<>();
for (HostAddress host : split.getAddresses()) {
locations.add(networkTopology.locate(host));
}
if (locations.isEmpty()) {
// Add the root location
locations.add(ROOT_LOCATION);
depth = 0;
}
// Try each address at progressively shallower network locations
for (int i = depth; i >= 0 && chosenNode == null; i--) {
for (NetworkLocation location : locations) {
// For example, locations which couldn't be located will be at the "root" location
if (location.getSegments().size() < i) {
continue;
}
location = location.subLocation(0, i);
if (filledLocations.contains(location)) {
continue;
}
Set<InternalNode> nodes = nodeMap.getWorkersByNetworkPath().get(location);
chosenNode = bestNodeSplitCount(splitWeight, new ResettableRandomizedIterator<>(nodes), minCandidates, calculateMaxPendingSplitsWeightPerTask(i, depth), assignmentStats);
if (chosenNode != null) {
chosenDepth = i;
break;
}
filledLocations.add(location);
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode, splitWeight);
topologicCounters[chosenDepth]++;
} else {
splitWaitingForAnyNode = true;
}
}
for (int i = 0; i < topologicCounters.length; i++) {
if (topologicCounters[i] > 0) {
topologicalSplitCounters.get(i).update(topologicCounters[i]);
}
}
ListenableFuture<Void> blocked;
long maxPendingForWildcardNetworkAffinity = calculateMaxPendingSplitsWeightPerTask(0, topologicalSplitCounters.size() - 1);
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingForWildcardNetworkAffinity));
}
return new SplitPlacementResult(blocked, assignment);
}
Aggregations