use of io.trino.sql.planner.plan.PlanFragmentId in project trino by trinodb.
the class FaultTolerantStageScheduler method schedule.
public synchronized void schedule() throws Exception {
if (failure != null) {
propagateIfPossible(failure, Exception.class);
throw new RuntimeException(failure);
}
if (closed) {
return;
}
if (isFinished()) {
return;
}
if (!blocked.isDone()) {
return;
}
if (taskSource == null) {
Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
if (!blockedFutures.isEmpty()) {
blocked = asVoid(allAsList(blockedFutures));
return;
}
Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
}
while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
for (TaskDescriptor task : tasks) {
queuedPartitions.add(task.getPartitionId());
allPartitions.add(task.getPartitionId());
taskDescriptorStorage.put(stage.getStageId(), task);
sinkExchange.ifPresent(exchange -> {
ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
});
}
if (taskSource.isFinished()) {
sinkExchange.ifPresent(Exchange::noMoreSinks);
}
}
if (queuedPartitions.isEmpty()) {
break;
}
int partition = queuedPartitions.peek();
Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
if (taskDescriptorOptional.isEmpty()) {
// query has been terminated
return;
}
TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
if (nodeLease == null) {
NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
nodeLease = nodeAllocator.acquire(nodeRequirements);
}
if (!nodeLease.getNode().isDone()) {
blocked = asVoid(nodeLease.getNode());
return;
}
NodeInfo node = getFutureValue(nodeLease.getNode());
queuedPartitions.poll();
Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
int attemptId = getNextAttemptIdForPartition(partition);
OutputBuffers outputBuffers;
Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
if (sinkExchange.isPresent()) {
ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
} else {
exchangeSinkInstanceHandle = Optional.empty();
// stage will be consumed by the coordinator using direct exchange
outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
}
Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
partitionToRemoteTaskMap.put(partition, task);
runningTasks.put(task.getTaskId(), task);
runningNodes.put(task.getTaskId(), nodeLease);
nodeLease = null;
if (taskFinishedFuture == null) {
taskFinishedFuture = SettableFuture.create();
}
taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
task.start();
}
if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
blocked = taskFinishedFuture;
}
}
use of io.trino.sql.planner.plan.PlanFragmentId in project trino by trinodb.
the class PipelinedStageExecution method updateSourceTasksOutputBuffers.
private synchronized void updateSourceTasksOutputBuffers(Consumer<OutputBufferManager> updater) {
for (PlanFragmentId sourceFragment : exchangeSources.keySet()) {
OutputBufferManager outputBufferManager = outputBufferManagers.get(sourceFragment);
updater.accept(outputBufferManager);
for (RemoteTask sourceTask : sourceTasks.get(sourceFragment)) {
sourceTask.setOutputBuffers(outputBufferManager.getOutputBuffers());
}
}
}
use of io.trino.sql.planner.plan.PlanFragmentId in project trino by trinodb.
the class AllAtOnceExecutionSchedule method getPreferredScheduleOrder.
@VisibleForTesting
static List<PlanFragmentId> getPreferredScheduleOrder(Collection<PlanFragment> fragments) {
// determine output fragment
Set<PlanFragmentId> remoteSources = fragments.stream().map(PlanFragment::getRemoteSourceNodes).flatMap(Collection::stream).map(RemoteSourceNode::getSourceFragmentIds).flatMap(Collection::stream).collect(toImmutableSet());
Set<PlanFragment> rootFragments = fragments.stream().filter(fragment -> !remoteSources.contains(fragment.getId())).collect(toImmutableSet());
Visitor visitor = new Visitor(fragments);
rootFragments.forEach(fragment -> visitor.processFragment(fragment.getId()));
return visitor.getSchedulerOrder();
}
use of io.trino.sql.planner.plan.PlanFragmentId in project trino by trinodb.
the class LegacyPhasedExecutionSchedule method extractPhases.
@VisibleForTesting
static List<Set<PlanFragmentId>> extractPhases(Collection<PlanFragment> fragments) {
// Build a graph where the plan fragments are vertexes and the edges represent
// a before -> after relationship. For example, a join hash build has an edge
// to the join probe.
DirectedGraph<PlanFragmentId, DefaultEdge> graph = new DefaultDirectedGraph<>(DefaultEdge.class);
fragments.forEach(fragment -> graph.addVertex(fragment.getId()));
Visitor visitor = new Visitor(fragments, graph);
for (PlanFragment fragment : fragments) {
visitor.processFragment(fragment.getId());
}
// Computes all the strongly connected components of the directed graph.
// These are the "phases" which hold the set of fragments that must be started
// at the same time to avoid deadlock.
List<Set<PlanFragmentId>> components = new StrongConnectivityInspector<>(graph).stronglyConnectedSets();
Map<PlanFragmentId, Set<PlanFragmentId>> componentMembership = new HashMap<>();
for (Set<PlanFragmentId> component : components) {
for (PlanFragmentId planFragmentId : component) {
componentMembership.put(planFragmentId, component);
}
}
// build graph of components (phases)
DirectedGraph<Set<PlanFragmentId>, DefaultEdge> componentGraph = new DefaultDirectedGraph<>(DefaultEdge.class);
components.forEach(componentGraph::addVertex);
for (DefaultEdge edge : graph.edgeSet()) {
PlanFragmentId source = graph.getEdgeSource(edge);
PlanFragmentId target = graph.getEdgeTarget(edge);
Set<PlanFragmentId> from = componentMembership.get(source);
Set<PlanFragmentId> to = componentMembership.get(target);
if (!from.equals(to)) {
// the topological order iterator below doesn't include vertices that have self-edges, so don't add them
componentGraph.addEdge(from, to);
}
}
List<Set<PlanFragmentId>> schedulePhases = ImmutableList.copyOf(new TopologicalOrderIterator<>(componentGraph));
return schedulePhases;
}
use of io.trino.sql.planner.plan.PlanFragmentId in project trino by trinodb.
the class StageTaskSourceFactory method getExchangeForHandleMap.
private static IdentityHashMap<ExchangeSourceHandle, Exchange> getExchangeForHandleMap(Map<PlanFragmentId, Exchange> sourceExchanges, Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSourceHandles) {
IdentityHashMap<ExchangeSourceHandle, Exchange> exchangeForHandle = new IdentityHashMap<>();
for (Map.Entry<PlanFragmentId, ExchangeSourceHandle> entry : exchangeSourceHandles.entries()) {
PlanFragmentId fragmentId = entry.getKey();
ExchangeSourceHandle handle = entry.getValue();
Exchange exchange = sourceExchanges.get(fragmentId);
requireNonNull(exchange, "Exchange not found for fragment " + fragmentId);
exchangeForHandle.put(handle, exchange);
}
return exchangeForHandle;
}
Aggregations