use of io.trino.spi.exchange.Exchange in project trino by trinodb.
the class FaultTolerantStageScheduler method schedule.
public synchronized void schedule() throws Exception {
if (failure != null) {
propagateIfPossible(failure, Exception.class);
throw new RuntimeException(failure);
}
if (closed) {
return;
}
if (isFinished()) {
return;
}
if (!blocked.isDone()) {
return;
}
if (taskSource == null) {
Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
if (!blockedFutures.isEmpty()) {
blocked = asVoid(allAsList(blockedFutures));
return;
}
Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
}
while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
for (TaskDescriptor task : tasks) {
queuedPartitions.add(task.getPartitionId());
allPartitions.add(task.getPartitionId());
taskDescriptorStorage.put(stage.getStageId(), task);
sinkExchange.ifPresent(exchange -> {
ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
});
}
if (taskSource.isFinished()) {
sinkExchange.ifPresent(Exchange::noMoreSinks);
}
}
if (queuedPartitions.isEmpty()) {
break;
}
int partition = queuedPartitions.peek();
Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
if (taskDescriptorOptional.isEmpty()) {
// query has been terminated
return;
}
TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
if (nodeLease == null) {
NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
nodeLease = nodeAllocator.acquire(nodeRequirements);
}
if (!nodeLease.getNode().isDone()) {
blocked = asVoid(nodeLease.getNode());
return;
}
NodeInfo node = getFutureValue(nodeLease.getNode());
queuedPartitions.poll();
Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
int attemptId = getNextAttemptIdForPartition(partition);
OutputBuffers outputBuffers;
Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
if (sinkExchange.isPresent()) {
ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
} else {
exchangeSinkInstanceHandle = Optional.empty();
// stage will be consumed by the coordinator using direct exchange
outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
}
Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
partitionToRemoteTaskMap.put(partition, task);
runningTasks.put(task.getTaskId(), task);
runningNodes.put(task.getTaskId(), nodeLease);
nodeLease = null;
if (taskFinishedFuture == null) {
taskFinishedFuture = SettableFuture.create();
}
taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
task.start();
}
if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
blocked = taskFinishedFuture;
}
}
use of io.trino.spi.exchange.Exchange in project trino by trinodb.
the class StageTaskSourceFactory method getExchangeForHandleMap.
private static IdentityHashMap<ExchangeSourceHandle, Exchange> getExchangeForHandleMap(Map<PlanFragmentId, Exchange> sourceExchanges, Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSourceHandles) {
IdentityHashMap<ExchangeSourceHandle, Exchange> exchangeForHandle = new IdentityHashMap<>();
for (Map.Entry<PlanFragmentId, ExchangeSourceHandle> entry : exchangeSourceHandles.entries()) {
PlanFragmentId fragmentId = entry.getKey();
ExchangeSourceHandle handle = entry.getValue();
Exchange exchange = sourceExchanges.get(fragmentId);
requireNonNull(exchange, "Exchange not found for fragment " + fragmentId);
exchangeForHandle.put(handle, exchange);
}
return exchangeForHandle;
}
use of io.trino.spi.exchange.Exchange in project trino by trinodb.
the class AbstractTestExchangeManager method testLargePages.
@Test
public void testLargePages() throws Exception {
Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
exchange.noMoreSinks();
ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(SMALL_PAGE)).putAll(1, ImmutableList.of(MAX_PAGE, MEDIUM_PAGE)).putAll(2, ImmutableList.of()).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(MEDIUM_PAGE)).putAll(1, ImmutableList.of(LARGE_PAGE)).putAll(2, ImmutableList.of(SMALL_PAGE)).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 0);
writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(LARGE_PAGE, MAX_PAGE)).putAll(1, ImmutableList.of(SMALL_PAGE)).putAll(2, ImmutableList.of(MAX_PAGE, LARGE_PAGE, MEDIUM_PAGE)).build(), true);
exchange.sinkFinished(sinkInstanceHandle);
List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
assertThat(partitionHandles).hasSize(3);
Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
assertThat(readData(partitions.get(2))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
exchange.close();
}
use of io.trino.spi.exchange.Exchange in project trino by trinodb.
the class AbstractTestExchangeManager method testHappyPath.
@Test
public void testHappyPath() throws Exception {
Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 2);
ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
exchange.noMoreSinks();
ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 1);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "failed", 1, "another failed"), false);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 1);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "more failed", 1, "another failed"), false);
exchange.sinkFinished(sinkInstanceHandle);
sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 2);
writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "2-0-0", 1, "2-1-0"), true);
exchange.sinkFinished(sinkInstanceHandle);
List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
assertThat(partitionHandles).hasSize(2);
Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder("0-0-0", "0-0-1", "1-0-0", "1-0-1", "2-0-0");
assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder("0-1-0", "0-1-1", "1-1-0", "1-1-1", "2-1-0");
exchange.close();
}
use of io.trino.spi.exchange.Exchange in project trino by trinodb.
the class TestStageTaskSourceFactory method testArbitraryDistributionTaskSource.
@Test
public void testArbitraryDistributionTaskSource() {
ExchangeManager splittingExchangeManager = new TestingExchangeManager(true);
ExchangeManager nonSplittingExchangeManager = new TestingExchangeManager(false);
TaskSource taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(), ImmutableListMultimap.of(), ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
assertFalse(taskSource.isFinished());
List<TaskDescriptor> tasks = taskSource.getMoreTasks();
assertThat(tasks).isEmpty();
assertTrue(taskSource.isFinished());
TestingExchangeSourceHandle sourceHandle1 = new TestingExchangeSourceHandle(0, 1);
TestingExchangeSourceHandle sourceHandle2 = new TestingExchangeSourceHandle(0, 2);
TestingExchangeSourceHandle sourceHandle3 = new TestingExchangeSourceHandle(0, 3);
TestingExchangeSourceHandle sourceHandle4 = new TestingExchangeSourceHandle(0, 4);
TestingExchangeSourceHandle sourceHandle123 = new TestingExchangeSourceHandle(0, 123);
TestingExchangeSourceHandle sourceHandle321 = new TestingExchangeSourceHandle(0, 321);
Multimap<PlanNodeId, ExchangeSourceHandle> nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle3);
Exchange exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle3, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertTrue(taskSource.isFinished());
assertThat(tasks).hasSize(1);
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle123);
exchange = nonSplittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle123, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 123)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle123, PLAN_NODE_2, sourceHandle321);
exchange = nonSplittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle123, exchange, sourceHandle321, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 123)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 321)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle2, PLAN_NODE_2, sourceHandle4);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle2, exchange, sourceHandle4, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_1, new TestingExchangeSourceHandle(0, 2)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle3, PLAN_NODE_2, sourceHandle4);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle3, exchange, sourceHandle4, exchange)), nonReplicatedSources, ImmutableListMultimap.of(), DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 3)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(3, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_2, new TestingExchangeSourceHandle(0, 1)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
// with replicated sources
nonReplicatedSources = ImmutableListMultimap.of(PLAN_NODE_1, sourceHandle1, PLAN_NODE_1, sourceHandle2, PLAN_NODE_1, sourceHandle4);
Multimap<PlanNodeId, ExchangeSourceHandle> replicatedSources = ImmutableListMultimap.of(PLAN_NODE_2, sourceHandle321);
exchange = splittingExchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
taskSource = new ArbitraryDistributionTaskSource(new IdentityHashMap<>(ImmutableMap.of(sourceHandle1, exchange, sourceHandle2, exchange, sourceHandle4, exchange, sourceHandle321, exchange)), nonReplicatedSources, replicatedSources, DataSize.of(3, BYTE), DataSize.of(4, GIGABYTE));
tasks = taskSource.getMoreTasks();
assertEquals(tasks, ImmutableList.of(new TaskDescriptor(0, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_1, new TestingExchangeSourceHandle(0, 2), PLAN_NODE_2, new TestingExchangeSourceHandle(0, 321)), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(1, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 3), PLAN_NODE_2, sourceHandle321), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE))), new TaskDescriptor(2, ImmutableListMultimap.of(), ImmutableListMultimap.of(PLAN_NODE_1, new TestingExchangeSourceHandle(0, 1), PLAN_NODE_2, sourceHandle321), new NodeRequirements(Optional.empty(), ImmutableSet.of(), DataSize.of(4, GIGABYTE)))));
}
Aggregations