use of io.trino.execution.buffer.OutputBuffers in project trino by trinodb.
the class FaultTolerantStageScheduler method schedule.
public synchronized void schedule() throws Exception {
if (failure != null) {
propagateIfPossible(failure, Exception.class);
throw new RuntimeException(failure);
}
if (closed) {
return;
}
if (isFinished()) {
return;
}
if (!blocked.isDone()) {
return;
}
if (taskSource == null) {
Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
if (!blockedFutures.isEmpty()) {
blocked = asVoid(allAsList(blockedFutures));
return;
}
Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
}
while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
for (TaskDescriptor task : tasks) {
queuedPartitions.add(task.getPartitionId());
allPartitions.add(task.getPartitionId());
taskDescriptorStorage.put(stage.getStageId(), task);
sinkExchange.ifPresent(exchange -> {
ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
});
}
if (taskSource.isFinished()) {
sinkExchange.ifPresent(Exchange::noMoreSinks);
}
}
if (queuedPartitions.isEmpty()) {
break;
}
int partition = queuedPartitions.peek();
Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
if (taskDescriptorOptional.isEmpty()) {
// query has been terminated
return;
}
TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
if (nodeLease == null) {
NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
nodeLease = nodeAllocator.acquire(nodeRequirements);
}
if (!nodeLease.getNode().isDone()) {
blocked = asVoid(nodeLease.getNode());
return;
}
NodeInfo node = getFutureValue(nodeLease.getNode());
queuedPartitions.poll();
Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
int attemptId = getNextAttemptIdForPartition(partition);
OutputBuffers outputBuffers;
Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
if (sinkExchange.isPresent()) {
ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
} else {
exchangeSinkInstanceHandle = Optional.empty();
// stage will be consumed by the coordinator using direct exchange
outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
}
Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
partitionToRemoteTaskMap.put(partition, task);
runningTasks.put(task.getTaskId(), task);
runningNodes.put(task.getTaskId(), nodeLease);
nodeLease = null;
if (taskFinishedFuture == null) {
taskFinishedFuture = SettableFuture.create();
}
taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
task.start();
}
if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
blocked = taskFinishedFuture;
}
}
use of io.trino.execution.buffer.OutputBuffers in project trino by trinodb.
the class TestSqlTask method testBufferCloseOnFinish.
@Test
public void testBufferCloseOnFinish() throws Exception {
SqlTask sqlTask = createInitialTask();
OutputBuffers outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(OUT, 0).withNoMoreBufferIds();
updateTask(sqlTask, EMPTY_SPLIT_ASSIGNMENTS, outputBuffers);
ListenableFuture<BufferResult> bufferResult = sqlTask.getTaskResults(OUT, 0, DataSize.of(1, MEGABYTE));
assertFalse(bufferResult.isDone());
// close the sources (no splits will ever be added)
updateTask(sqlTask, ImmutableList.of(new SplitAssignment(TABLE_SCAN_NODE_ID, ImmutableSet.of(), true)), outputBuffers);
// finish the task by calling abort on it
sqlTask.destroyTaskResults(OUT);
// buffer will be closed by cancel event (wait for event to fire)
bufferResult.get(1, SECONDS);
// verify the buffer is closed
bufferResult = sqlTask.getTaskResults(OUT, 0, DataSize.of(1, MEGABYTE));
assertTrue(bufferResult.isDone());
assertTrue(bufferResult.get().isBufferComplete());
}
use of io.trino.execution.buffer.OutputBuffers in project trino by trinodb.
the class TestBroadcastOutputBufferManager method test.
@Test
public void test() {
BroadcastOutputBufferManager hashOutputBufferManager = new BroadcastOutputBufferManager();
assertEquals(hashOutputBufferManager.getOutputBuffers(), createInitialEmptyOutputBuffers(BROADCAST));
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(0));
OutputBuffers expectedOutputBuffers = createInitialEmptyOutputBuffers(BROADCAST).withBuffer(new OutputBufferId(0), BROADCAST_PARTITION_ID);
assertEquals(hashOutputBufferManager.getOutputBuffers(), expectedOutputBuffers);
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(1));
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(2));
expectedOutputBuffers = expectedOutputBuffers.withBuffer(new OutputBufferId(1), BROADCAST_PARTITION_ID);
expectedOutputBuffers = expectedOutputBuffers.withBuffer(new OutputBufferId(2), BROADCAST_PARTITION_ID);
assertEquals(hashOutputBufferManager.getOutputBuffers(), expectedOutputBuffers);
// set no more buffers
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(3));
hashOutputBufferManager.noMoreBuffers();
expectedOutputBuffers = expectedOutputBuffers.withBuffer(new OutputBufferId(3), BROADCAST_PARTITION_ID);
expectedOutputBuffers = expectedOutputBuffers.withNoMoreBufferIds();
assertEquals(hashOutputBufferManager.getOutputBuffers(), expectedOutputBuffers);
// try to add another buffer, which should not result in an error
// and output buffers should not change
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(5));
assertEquals(hashOutputBufferManager.getOutputBuffers(), expectedOutputBuffers);
// try to set no more buffers again, which should not result in an error
// and output buffers should not change
hashOutputBufferManager.addOutputBuffer(new OutputBufferId(6));
assertEquals(hashOutputBufferManager.getOutputBuffers(), expectedOutputBuffers);
}
use of io.trino.execution.buffer.OutputBuffers in project trino by trinodb.
the class PipelinedStageExecution method scheduleTask.
@Override
public synchronized Optional<RemoteTask> scheduleTask(InternalNode node, int partition, Multimap<PlanNodeId, Split> initialSplits, Multimap<PlanNodeId, Lifespan> noMoreSplitsForLifespan) {
if (stateMachine.getState().isDone()) {
return Optional.empty();
}
checkArgument(!tasks.containsKey(partition), "A task for partition %s already exists", partition);
OutputBuffers outputBuffers = outputBufferManagers.get(stage.getFragment().getId()).getOutputBuffers();
Optional<RemoteTask> optionalTask = stage.createTask(node, partition, attempt, bucketToPartition, outputBuffers, initialSplits, ImmutableMultimap.of(), ImmutableSet.of());
if (optionalTask.isEmpty()) {
return Optional.empty();
}
RemoteTask task = optionalTask.get();
tasks.put(partition, task);
ImmutableMultimap.Builder<PlanNodeId, Split> exchangeSplits = ImmutableMultimap.builder();
sourceTasks.forEach((fragmentId, sourceTask) -> {
TaskStatus status = sourceTask.getTaskStatus();
if (status.getState() != TaskState.FINISHED) {
PlanNodeId planNodeId = exchangeSources.get(fragmentId).getId();
exchangeSplits.put(planNodeId, createExchangeSplit(sourceTask, task));
}
});
allTasks.add(task.getTaskId());
task.addSplits(exchangeSplits.build());
noMoreSplitsForLifespan.forEach(task::noMoreSplits);
completeSources.forEach(task::noMoreSplits);
task.addStateChangeListener(this::updateTaskStatus);
task.addStateChangeListener(this::updateCompletedDriverGroups);
task.start();
taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
// update output buffers
OutputBufferId outputBufferId = new OutputBufferId(task.getTaskId().getPartitionId());
updateSourceTasksOutputBuffers(outputBufferManager -> outputBufferManager.addOutputBuffer(outputBufferId));
return Optional.of(task);
}
Aggregations