Search in sources :

Example 1 with ExchangeSinkInstanceHandle

use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.

the class FaultTolerantStageScheduler method schedule.

public synchronized void schedule() throws Exception {
    if (failure != null) {
        propagateIfPossible(failure, Exception.class);
        throw new RuntimeException(failure);
    }
    if (closed) {
        return;
    }
    if (isFinished()) {
        return;
    }
    if (!blocked.isDone()) {
        return;
    }
    if (taskSource == null) {
        Map<PlanFragmentId, ListenableFuture<List<ExchangeSourceHandle>>> sourceHandles = sourceExchanges.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> toListenableFuture(entry.getValue().getSourceHandles())));
        List<ListenableFuture<List<ExchangeSourceHandle>>> blockedFutures = sourceHandles.values().stream().filter(future -> !future.isDone()).collect(toImmutableList());
        if (!blockedFutures.isEmpty()) {
            blocked = asVoid(allAsList(blockedFutures));
            return;
        }
        Multimap<PlanFragmentId, ExchangeSourceHandle> exchangeSources = sourceHandles.entrySet().stream().collect(flatteningToImmutableListMultimap(Map.Entry::getKey, entry -> getFutureValue(entry.getValue()).stream()));
        taskSource = taskSourceFactory.create(session, stage.getFragment(), sourceExchanges, exchangeSources, stage::recordGetSplitTime, sourceBucketToPartitionMap, sourceBucketNodeMap);
    }
    while (!queuedPartitions.isEmpty() || !taskSource.isFinished()) {
        while (queuedPartitions.isEmpty() && !taskSource.isFinished()) {
            List<TaskDescriptor> tasks = taskSource.getMoreTasks();
            for (TaskDescriptor task : tasks) {
                queuedPartitions.add(task.getPartitionId());
                allPartitions.add(task.getPartitionId());
                taskDescriptorStorage.put(stage.getStageId(), task);
                sinkExchange.ifPresent(exchange -> {
                    ExchangeSinkHandle exchangeSinkHandle = exchange.addSink(task.getPartitionId());
                    partitionToExchangeSinkHandleMap.put(task.getPartitionId(), exchangeSinkHandle);
                });
            }
            if (taskSource.isFinished()) {
                sinkExchange.ifPresent(Exchange::noMoreSinks);
            }
        }
        if (queuedPartitions.isEmpty()) {
            break;
        }
        int partition = queuedPartitions.peek();
        Optional<TaskDescriptor> taskDescriptorOptional = taskDescriptorStorage.get(stage.getStageId(), partition);
        if (taskDescriptorOptional.isEmpty()) {
            // query has been terminated
            return;
        }
        TaskDescriptor taskDescriptor = taskDescriptorOptional.get();
        MemoryRequirements memoryRequirements = partitionMemoryRequirements.computeIfAbsent(partition, ignored -> partitionMemoryEstimator.getInitialMemoryRequirements(session, taskDescriptor.getNodeRequirements().getMemory()));
        if (nodeLease == null) {
            NodeRequirements nodeRequirements = taskDescriptor.getNodeRequirements();
            nodeRequirements = nodeRequirements.withMemory(memoryRequirements.getRequiredMemory());
            nodeLease = nodeAllocator.acquire(nodeRequirements);
        }
        if (!nodeLease.getNode().isDone()) {
            blocked = asVoid(nodeLease.getNode());
            return;
        }
        NodeInfo node = getFutureValue(nodeLease.getNode());
        queuedPartitions.poll();
        Multimap<PlanNodeId, Split> tableScanSplits = taskDescriptor.getSplits();
        Multimap<PlanNodeId, Split> remoteSplits = createRemoteSplits(taskDescriptor.getExchangeSourceHandles());
        Multimap<PlanNodeId, Split> taskSplits = ImmutableListMultimap.<PlanNodeId, Split>builder().putAll(tableScanSplits).putAll(remoteSplits).build();
        int attemptId = getNextAttemptIdForPartition(partition);
        OutputBuffers outputBuffers;
        Optional<ExchangeSinkInstanceHandle> exchangeSinkInstanceHandle;
        if (sinkExchange.isPresent()) {
            ExchangeSinkHandle sinkHandle = partitionToExchangeSinkHandleMap.get(partition);
            exchangeSinkInstanceHandle = Optional.of(sinkExchange.get().instantiateSink(sinkHandle, attemptId));
            outputBuffers = createSpoolingExchangeOutputBuffers(exchangeSinkInstanceHandle.get());
        } else {
            exchangeSinkInstanceHandle = Optional.empty();
            // stage will be consumed by the coordinator using direct exchange
            outputBuffers = createInitialEmptyOutputBuffers(PARTITIONED).withBuffer(new OutputBuffers.OutputBufferId(0), 0).withNoMoreBufferIds();
        }
        Set<PlanNodeId> allSourcePlanNodeIds = ImmutableSet.<PlanNodeId>builder().addAll(stage.getFragment().getPartitionedSources()).addAll(stage.getFragment().getRemoteSourceNodes().stream().map(RemoteSourceNode::getId).iterator()).build();
        RemoteTask task = stage.createTask(node.getNode(), partition, attemptId, sinkBucketToPartitionMap, outputBuffers, taskSplits, allSourcePlanNodeIds.stream().collect(toImmutableListMultimap(Function.identity(), planNodeId -> Lifespan.taskWide())), allSourcePlanNodeIds).orElseThrow(() -> new VerifyException("stage execution is expected to be active"));
        partitionToRemoteTaskMap.put(partition, task);
        runningTasks.put(task.getTaskId(), task);
        runningNodes.put(task.getTaskId(), nodeLease);
        nodeLease = null;
        if (taskFinishedFuture == null) {
            taskFinishedFuture = SettableFuture.create();
        }
        taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
        task.addStateChangeListener(taskStatus -> updateTaskStatus(taskStatus, exchangeSinkInstanceHandle));
        task.start();
    }
    if (taskFinishedFuture != null && !taskFinishedFuture.isDone()) {
        blocked = taskFinishedFuture;
    }
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) SettableFuture(com.google.common.util.concurrent.SettableFuture) RemoteSourceNode(io.trino.sql.planner.plan.RemoteSourceNode) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) ImmutableListMultimap.toImmutableListMultimap(com.google.common.collect.ImmutableListMultimap.toImmutableListMultimap) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Map(java.util.Map) SpoolingExchangeInput(io.trino.split.RemoteSplit.SpoolingExchangeInput) REMOTE_HOST_GONE(io.trino.spi.StandardErrorCode.REMOTE_HOST_GONE) Futures.immediateVoidFuture(com.google.common.util.concurrent.Futures.immediateVoidFuture) ImmutableSet(com.google.common.collect.ImmutableSet) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) OutputBuffers.createSpoolingExchangeOutputBuffers(io.trino.execution.buffer.OutputBuffers.createSpoolingExchangeOutputBuffers) ImmutableMap(com.google.common.collect.ImmutableMap) ExecutionFailureInfo(io.trino.execution.ExecutionFailureInfo) Futures.allAsList(com.google.common.util.concurrent.Futures.allAsList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) MoreFutures.toListenableFuture(io.airlift.concurrent.MoreFutures.toListenableFuture) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) GONE(io.trino.failuredetector.FailureDetector.State.GONE) GuardedBy(javax.annotation.concurrent.GuardedBy) TaskId(io.trino.execution.TaskId) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Split(io.trino.metadata.Split) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) Optional(java.util.Optional) Queue(java.util.Queue) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) OutputBuffers.createInitialEmptyOutputBuffers(io.trino.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) Session(io.trino.Session) ImmutableListMultimap.flatteningToImmutableListMultimap(com.google.common.collect.ImmutableListMultimap.flatteningToImmutableListMultimap) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StageId(io.trino.execution.StageId) Logger(io.airlift.log.Logger) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) ErrorCode(io.trino.spi.ErrorCode) Function(java.util.function.Function) Failures.toFailure(io.trino.util.Failures.toFailure) RemoteSplit(io.trino.split.RemoteSplit) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) USER_ERROR(io.trino.spi.ErrorType.USER_ERROR) Objects.requireNonNull(java.util.Objects.requireNonNull) TaskState(io.trino.execution.TaskState) Lifespan(io.trino.execution.Lifespan) Exchange(io.trino.spi.exchange.Exchange) VerifyException(com.google.common.base.VerifyException) SqlStage(io.trino.execution.SqlStage) FailureDetector(io.trino.failuredetector.FailureDetector) RemoteTask(io.trino.execution.RemoteTask) TaskStatus(io.trino.execution.TaskStatus) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) MoreFutures.asVoid(io.airlift.concurrent.MoreFutures.asVoid) PARTITIONED(io.trino.execution.buffer.OutputBuffers.BufferType.PARTITIONED) Futures.nonCancellationPropagating(com.google.common.util.concurrent.Futures.nonCancellationPropagating) OutputBuffers(io.trino.execution.buffer.OutputBuffers) ArrayDeque(java.util.ArrayDeque) REMOTE_CONNECTOR_ID(io.trino.operator.ExchangeOperator.REMOTE_CONNECTOR_ID) MemoryRequirements(io.trino.execution.scheduler.PartitionMemoryEstimator.MemoryRequirements) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) OutputBuffers.createSpoolingExchangeOutputBuffers(io.trino.execution.buffer.OutputBuffers.createSpoolingExchangeOutputBuffers) OutputBuffers.createInitialEmptyOutputBuffers(io.trino.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers) OutputBuffers(io.trino.execution.buffer.OutputBuffers) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) RemoteTask(io.trino.execution.RemoteTask) Exchange(io.trino.spi.exchange.Exchange) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) VerifyException(com.google.common.base.VerifyException) MoreFutures.toListenableFuture(io.airlift.concurrent.MoreFutures.toListenableFuture) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Split(io.trino.metadata.Split) RemoteSplit(io.trino.split.RemoteSplit) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap)

Example 2 with ExchangeSinkInstanceHandle

use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.

the class LazyOutputBuffer method setOutputBuffers.

@Override
public void setOutputBuffers(OutputBuffers newOutputBuffers) {
    Set<OutputBufferId> destroyedBuffers = ImmutableSet.of();
    List<PendingRead> pendingReads = ImmutableList.of();
    OutputBuffer outputBuffer = delegate;
    if (outputBuffer == null) {
        synchronized (this) {
            outputBuffer = delegate;
            if (outputBuffer == null) {
                // ignore set output if buffer was already destroyed or failed
                if (stateMachine.getState().isTerminal()) {
                    return;
                }
                switch(newOutputBuffers.getType()) {
                    case PARTITIONED:
                        outputBuffer = new PartitionedOutputBuffer(taskInstanceId, stateMachine, newOutputBuffers, maxBufferSize, memoryContextSupplier, executor);
                        break;
                    case BROADCAST:
                        outputBuffer = new BroadcastOutputBuffer(taskInstanceId, stateMachine, maxBroadcastBufferSize, memoryContextSupplier, executor, notifyStatusChanged);
                        break;
                    case ARBITRARY:
                        outputBuffer = new ArbitraryOutputBuffer(taskInstanceId, stateMachine, maxBufferSize, memoryContextSupplier, executor);
                        break;
                    case SPOOL:
                        ExchangeSinkInstanceHandle exchangeSinkInstanceHandle = newOutputBuffers.getExchangeSinkInstanceHandle().orElseThrow(() -> new IllegalArgumentException("exchange sink handle is expected to be present for buffer type EXTERNAL"));
                        ExchangeManager exchangeManager = exchangeManagerRegistry.getExchangeManager();
                        ExchangeSink exchangeSink = exchangeManager.createSink(exchangeSinkInstanceHandle, false);
                        outputBuffer = new SpoolingExchangeOutputBuffer(stateMachine, newOutputBuffers, exchangeSink, memoryContextSupplier);
                        break;
                    default:
                        throw new IllegalArgumentException("Unexpected output buffer type: " + newOutputBuffers.getType());
                }
                // process pending aborts and reads outside of synchronized lock
                destroyedBuffers = ImmutableSet.copyOf(this.destroyedBuffers);
                this.destroyedBuffers.clear();
                pendingReads = ImmutableList.copyOf(this.pendingReads);
                this.pendingReads.clear();
                // Must be assigned last to avoid a race condition with unsynchronized readers
                delegate = outputBuffer;
            }
        }
    }
    outputBuffer.setOutputBuffers(newOutputBuffers);
    // process pending aborts and reads outside of synchronized lock
    destroyedBuffers.forEach(outputBuffer::destroy);
    for (PendingRead pendingRead : pendingReads) {
        pendingRead.process(outputBuffer);
    }
}
Also used : ExchangeManager(io.trino.spi.exchange.ExchangeManager) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) OutputBufferId(io.trino.execution.buffer.OutputBuffers.OutputBufferId) ExchangeSink(io.trino.spi.exchange.ExchangeSink)

Example 3 with ExchangeSinkInstanceHandle

use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.

the class AbstractTestExchangeManager method testLargePages.

@Test
public void testLargePages() throws Exception {
    Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 3);
    ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
    ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
    ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
    exchange.noMoreSinks();
    ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
    writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(SMALL_PAGE)).putAll(1, ImmutableList.of(MAX_PAGE, MEDIUM_PAGE)).putAll(2, ImmutableList.of()).build(), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
    writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(MEDIUM_PAGE)).putAll(1, ImmutableList.of(LARGE_PAGE)).putAll(2, ImmutableList.of(SMALL_PAGE)).build(), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 0);
    writeData(sinkInstanceHandle, new ImmutableListMultimap.Builder<Integer, String>().putAll(0, ImmutableList.of(LARGE_PAGE, MAX_PAGE)).putAll(1, ImmutableList.of(SMALL_PAGE)).putAll(2, ImmutableList.of(MAX_PAGE, LARGE_PAGE, MEDIUM_PAGE)).build(), true);
    exchange.sinkFinished(sinkInstanceHandle);
    List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
    assertThat(partitionHandles).hasSize(3);
    Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
    assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
    assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
    assertThat(readData(partitions.get(2))).containsExactlyInAnyOrder(SMALL_PAGE, MEDIUM_PAGE, LARGE_PAGE, MAX_PAGE);
    exchange.close();
}
Also used : Exchange(io.trino.spi.exchange.Exchange) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) QueryId(io.trino.spi.QueryId) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) ExchangeContext(io.trino.spi.exchange.ExchangeContext) Test(org.testng.annotations.Test)

Example 4 with ExchangeSinkInstanceHandle

use of io.trino.spi.exchange.ExchangeSinkInstanceHandle in project trino by trinodb.

the class AbstractTestExchangeManager method testHappyPath.

@Test
public void testHappyPath() throws Exception {
    Exchange exchange = exchangeManager.createExchange(new ExchangeContext(new QueryId("query"), createRandomExchangeId()), 2);
    ExchangeSinkHandle sinkHandle0 = exchange.addSink(0);
    ExchangeSinkHandle sinkHandle1 = exchange.addSink(1);
    ExchangeSinkHandle sinkHandle2 = exchange.addSink(2);
    exchange.noMoreSinks();
    ExchangeSinkInstanceHandle sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 0);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 1);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "0-0-0", 1, "0-1-0", 0, "0-0-1", 1, "0-1-1"), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle0, 2);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "failed", 1, "another failed"), false);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 0);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 1);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "1-0-0", 1, "1-1-0", 0, "1-0-1", 1, "1-1-1"), true);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle1, 2);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "more failed", 1, "another failed"), false);
    exchange.sinkFinished(sinkInstanceHandle);
    sinkInstanceHandle = exchange.instantiateSink(sinkHandle2, 2);
    writeData(sinkInstanceHandle, ImmutableListMultimap.of(0, "2-0-0", 1, "2-1-0"), true);
    exchange.sinkFinished(sinkInstanceHandle);
    List<ExchangeSourceHandle> partitionHandles = exchange.getSourceHandles().get();
    assertThat(partitionHandles).hasSize(2);
    Map<Integer, ExchangeSourceHandle> partitions = partitionHandles.stream().collect(toImmutableMap(ExchangeSourceHandle::getPartitionId, Function.identity()));
    assertThat(readData(partitions.get(0))).containsExactlyInAnyOrder("0-0-0", "0-0-1", "1-0-0", "1-0-1", "2-0-0");
    assertThat(readData(partitions.get(1))).containsExactlyInAnyOrder("0-1-0", "0-1-1", "1-1-0", "1-1-1", "2-1-0");
    exchange.close();
}
Also used : Exchange(io.trino.spi.exchange.Exchange) ExchangeSinkHandle(io.trino.spi.exchange.ExchangeSinkHandle) ExchangeSourceHandle(io.trino.spi.exchange.ExchangeSourceHandle) QueryId(io.trino.spi.QueryId) ExchangeSinkInstanceHandle(io.trino.spi.exchange.ExchangeSinkInstanceHandle) ExchangeContext(io.trino.spi.exchange.ExchangeContext) Test(org.testng.annotations.Test)

Aggregations

ExchangeSinkInstanceHandle (io.trino.spi.exchange.ExchangeSinkInstanceHandle)4 Exchange (io.trino.spi.exchange.Exchange)3 ExchangeSinkHandle (io.trino.spi.exchange.ExchangeSinkHandle)3 ExchangeSourceHandle (io.trino.spi.exchange.ExchangeSourceHandle)3 ImmutableListMultimap (com.google.common.collect.ImmutableListMultimap)2 QueryId (io.trino.spi.QueryId)2 ExchangeContext (io.trino.spi.exchange.ExchangeContext)2 Test (org.testng.annotations.Test)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Throwables.propagateIfPossible (com.google.common.base.Throwables.propagateIfPossible)1 Verify.verify (com.google.common.base.Verify.verify)1 VerifyException (com.google.common.base.VerifyException)1 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableListMultimap.flatteningToImmutableListMultimap (com.google.common.collect.ImmutableListMultimap.flatteningToImmutableListMultimap)1 ImmutableListMultimap.toImmutableListMultimap (com.google.common.collect.ImmutableListMultimap.toImmutableListMultimap)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1