use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.
the class FaultTolerantStageScheduler method schedule.
public synchronized void schedule() throws Exception {
if (failure != null) {
propagateIfPossible(failure, Exception.class);
throw new RuntimeException(failure);
}
if (closed) {
return;
}
if (isFinished()) {
return;
}
if (!blocked.isDone()) {
return;
}
if (taskSource == null) {
Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
if (!blockedFutures.isEmpty()) {
blocked = asVoid(allAsList(blockedFutures));
return;
}
Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
}
while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
for (TaskDescriptor task : tasks) {
queuedPartitions.add(task.getPartitionId());
allPartitions.add(task.getPartitionId());
taskDescriptorStorage.put(stage.getStageId(), task);
sinkExchange.ifPresent(exchange -> {
ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
});
}
if (taskSource.isFinished()) {
sinkExchange.ifPresent(Exchange::noMoreSinks);
}
}
if (queuedPartitions.isEmpty()) {
break;
}
int partition = queuedPartitions.peek();
Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
if (taskDescriptorOptional.isEmpty()) {
// query has been terminated
return;
}
TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
if (nodeLease == null) {
NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
nodeLease = nodeAllocator.acquire(nodeRequirements);
}
if (!nodeLease.getNode().isDone()) {
blocked = asVoid(nodeLease.getNode());
return;
}
NodeInfo node = getFutureValue(nodeLease.getNode());
queuedPartitions.poll();
Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
int attemptId = getNextAttemptIdForPartition(partition);
OutputBuffers outputBuffers;
Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
if (sinkExchange.isPresent()) {
ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
} else {
exchangeSinkInstanceHandle = Optional.empty();
// stage will be consumed by the coordinator using direct exchange
outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
}
Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
partitionToRemoteTaskMap.put(partition, task);
runningTasks.put(task.getTaskId(), task);
runningNodes.put(task.getTaskId(), nodeLease);
nodeLease = null;
if (taskFinishedFuture == null) {
taskFinishedFuture = SettableFuture.create();
}
taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
task.start();
}
if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
blocked = taskFinishedFuture;
}
}
use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.
the class LazyOutputBuffer method setOutputBuffers.
@Override
public void setOutputBuffers(OutputBuffers newOutputBuffers) {
Set<OutputBufferId> destroyedBuffers = ImmutableSet.of();
List<PendingRead> pendingReads = ImmutableList.of();
OutputBuffer outputBuffer = delegate;
if (outputBuffer == null) {
synchronized (this) {
outputBuffer = delegate;
if (outputBuffer == null) {
// ignore set output if buffer was already destroyed or failed
if (stateMachine.getState().isTerminal()) {
return;
}
switch(newOutputBuffers.getType()) {
case PARTITIONED:
outputBuffer = new PartitionedOutputBuffer(taskInstanceId, stateMachine, newOutputBuffers, maxBufferSize, memoryContextSupplier, executor);
break;
case BROADCAST:
outputBuffer = new BroadcastOutputBuffer(taskInstanceId, stateMachine, maxBroadcastBufferSize, memoryContextSupplier, executor, notifyStatusChanged);
break;
case ARBITRARY:
outputBuffer = new ArbitraryOutputBuffer(taskInstanceId, stateMachine, maxBufferSize, memoryContextSupplier, executor);
break;
case SPOOL:
ExchangeSinkInstanceHandle exchangeSinkInstanceHandle = newOutputBuffers.getExchangeSinkInstanceHandle().orElseThrow(() -> new IllegalArgumentException("exchange sink handle is expected to be present for buffer type EXTERNAL"));
ExchangeManager exchangeManager = exchangeManagerRegistry.getExchangeManager();
ExchangeSink exchangeSink = exchangeManager.createSink(exchangeSinkInstanceHandle, false);
outputBuffer = new SpoolingExchangeOutputBuffer(stateMachine, newOutputBuffers, exchangeSink, memoryContextSupplier);
break;
default:
throw new IllegalArgumentException("Unexpected output buffer type: " + newOutputBuffers.getType());
}
// process pending aborts and reads outside of synchronized lock
destroyedBuffers = ImmutableSet.copyOf(this.destroyedBuffers);
this.destroyedBuffers.clear();
pendingReads = ImmutableList.copyOf(this.pendingReads);
this.pendingReads.clear();
// Must be assigned last to avoid a race condition with unsynchronized readers
delegate = outputBuffer;
}
}
}
outputBuffer.setOutputBuffers(newOutputBuffers);
// process pending aborts and reads outside of synchronized lock
destroyedBuffers.forEach(outputBuffer::destroy);
for (PendingRead pendingRead : pendingReads) {
pendingRead.process(outputBuffer);
}
}
use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.
the class AbstractTestExchangeManager method testLargePages.
@Test
public void testLargePages() throws Exception {
Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
exchange.noMoreSinks();
ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(SMALL_PAGE)).putAll(1, ImmutableList.of(MAX_PAGE, MEDIUM_PAGE)).putAll(2, ImmutableList.of()).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(MEDIUM_PAGE)).putAll(1, ImmutableList.of(LARGE_PAGE)).putAll(2, ImmutableList.of(SMALL_PAGE)).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(LARGE_PAGE, MAX_PAGE)).putAll(1, ImmutableList.of(SMALL_PAGE)).putAll(2, ImmutableList.of(MAX_PAGE, LARGE_PAGE, MEDIUM_PAGE)).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
assertThat(partitionHandles).hasSize(3);
Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
assertThat(readData(partitions.get(2))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
exchange.close();
}
use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.
the class AbstractTestExchangeManager method testHappyPath.
@Test
public void testHappyPath() throws Exception {
Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 2);
ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
exchange.noMoreSinks();
ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 1);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "failed", 1, "another failed"), false);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 1);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "more failed", 1, "another failed"), false);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "2-0-0", 1, "2-1-0"), true);
exchange.sinkFinished(sinkInstanceHandle);
List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
assertThat(partitionHandles).hasSize(2);
Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder("0-0-0", "0-0-1", "1-0-0", "1-0-1", "2-0-0");
assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder("0-1-0", "0-1-1", "1-1-0", "1-1-1", "2-1-0");
exchange.close();
}
Aggregations