Search in sources :

Example 21 with RemoteTask

use of io.trino.execution.RemoteTask in project trino by trinodb.

the class TestHttpRemoteTask method runTest.

private void runTest(FailureScenario failureScenario) throws Exception {
    AtomicLong lastActivityNanos = new AtomicLong(System.nanoTime());
    TestingTaskResource testingTaskResource = new TestingTaskResource(lastActivityNanos, failureScenario);
    HttpRemoteTaskFactory httpRemoteTaskFactory = createHttpRemoteTaskFactory(testingTaskResource);
    RemoteTask remoteTask = createRemoteTask(httpRemoteTaskFactory, ImmutableSet.of());
    testingTaskResource.setInitialTaskInfo(remoteTask.getTaskInfo());
    remoteTask.start();
    waitUntilIdle(lastActivityNanos);
    httpRemoteTaskFactory.stop();
    assertTrue(remoteTask.getTaskStatus().getState().isDone(), format("TaskStatus is not in a done state: %s", remoteTask.getTaskStatus()));
    ErrorCode actualErrorCode = getOnlyElement(remoteTask.getTaskStatus().getFailures()).getErrorCode();
    switch(failureScenario) {
        case TASK_MISMATCH:
        case TASK_MISMATCH_WHEN_VERSION_IS_HIGH:
            assertTrue(remoteTask.getTaskInfo().getTaskStatus().getState().isDone(), format("TaskInfo is not in a done state: %s", remoteTask.getTaskInfo()));
            assertEquals(actualErrorCode, REMOTE_TASK_MISMATCH.toErrorCode());
            break;
        case REJECTED_EXECUTION:
            // for a rejection to occur, the http client must be shutdown, which means we will not be able to ge the final task info
            assertEquals(actualErrorCode, REMOTE_TASK_ERROR.toErrorCode());
            break;
        default:
            throw new UnsupportedOperationException();
    }
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) HttpRemoteTaskFactory(io.trino.server.HttpRemoteTaskFactory) RemoteTask(io.trino.execution.RemoteTask) ErrorCode(io.trino.spi.ErrorCode)

Example 22 with RemoteTask

use of io.trino.execution.RemoteTask in project trino by trinodb.

the class UniformNodeSelector method computeAssignments.

@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks) {
    Multimap<InternalNode, Split> assignment = HashMultimap.create();
    NodeMap nodeMap = this.nodeMap.get().get();
    NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMap, existingTasks);
    ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMap, includeCoordinator, ImmutableSet.of());
    Set<InternalNode> blockedExactNodes = new HashSet<>();
    boolean splitWaitingForAnyNode = false;
    // splitsToBeRedistributed becomes true only when splits go through locality-based assignment
    boolean splitsToBeRedistributed = false;
    Set<Split> remainingSplits = new HashSet<>();
    // optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
    if (optimizedLocalScheduling) {
        for (Split split : splits) {
            if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
                List<InternalNode> candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
                Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitsWeight(ownerNode) < maxSplitsWeightPerNode && assignmentStats.getUnacknowledgedSplitCountForStage(ownerNode) < maxUnacknowledgedSplitsPerTask).min(comparingLong(assignmentStats::getTotalSplitsWeight));
                if (chosenNode.isPresent()) {
                    assignment.put(chosenNode.get(), split);
                    assignmentStats.addAssignedSplit(chosenNode.get(), split.getSplitWeight());
                    splitsToBeRedistributed = true;
                    continue;
                }
            }
            remainingSplits.add(split);
        }
    } else {
        remainingSplits = splits;
    }
    for (Split split : remainingSplits) {
        randomCandidates.reset();
        List<InternalNode> candidateNodes;
        if (!split.isRemotelyAccessible()) {
            candidateNodes = selectExactNodes(nodeMap, split.getAddresses(), includeCoordinator);
        } else {
            candidateNodes = selectNodes(minCandidates, randomCandidates);
        }
        if (candidateNodes.isEmpty()) {
            log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMap.getNodesByHost().keys());
            throw new TrinoException(NO_NODES_AVAILABLE, "No nodes available to run query");
        }
        InternalNode chosenNode = chooseNodeForSplit(assignmentStats, candidateNodes);
        if (chosenNode == null) {
            long minWeight = Long.MAX_VALUE;
            for (InternalNode node : candidateNodes) {
                long queuedWeight = assignmentStats.getQueuedSplitsWeightForStage(node);
                if (queuedWeight <= minWeight && queuedWeight < maxPendingSplitsWeightPerTask && assignmentStats.getUnacknowledgedSplitCountForStage(node) < maxUnacknowledgedSplitsPerTask) {
                    chosenNode = node;
                    minWeight = queuedWeight;
                }
            }
        }
        if (chosenNode != null) {
            assignment.put(chosenNode, split);
            assignmentStats.addAssignedSplit(chosenNode, split.getSplitWeight());
        } else {
            if (split.isRemotelyAccessible()) {
                splitWaitingForAnyNode = true;
            } else // Exact node set won't matter, if a split is waiting for any node
            if (!splitWaitingForAnyNode) {
                blockedExactNodes.addAll(candidateNodes);
            }
        }
    }
    ListenableFuture<Void> blocked;
    if (splitWaitingForAnyNode) {
        blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
    } else {
        blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsWeightPerTask));
    }
    if (splitsToBeRedistributed) {
        equateDistribution(assignment, assignmentStats, nodeMap, includeCoordinator);
    }
    return new SplitPlacementResult(blocked, assignment);
}
Also used : InternalNodeManager(io.trino.metadata.InternalNodeManager) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) NodeTaskMap(io.trino.execution.NodeTaskMap) Logger(io.airlift.log.Logger) Multimap(com.google.common.collect.Multimap) AtomicReference(java.util.concurrent.atomic.AtomicReference) Supplier(java.util.function.Supplier) SplitWeight(io.trino.spi.SplitWeight) InetAddress(java.net.InetAddress) HashSet(java.util.HashSet) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HashMultimap(com.google.common.collect.HashMultimap) NodeScheduler.randomizedNodes(io.trino.execution.scheduler.NodeScheduler.randomizedNodes) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NodeScheduler.selectNodes(io.trino.execution.scheduler.NodeScheduler.selectNodes) Nullable(javax.annotation.Nullable) ImmutableSet(com.google.common.collect.ImmutableSet) SplitsBalancingPolicy(io.trino.execution.scheduler.NodeSchedulerConfig.SplitsBalancingPolicy) Iterator(java.util.Iterator) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) NodeScheduler.selectDistributionNodes(io.trino.execution.scheduler.NodeScheduler.selectDistributionNodes) RemoteTask(io.trino.execution.RemoteTask) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) UnknownHostException(java.net.UnknownHostException) SetMultimap(com.google.common.collect.SetMultimap) InternalNode(io.trino.metadata.InternalNode) List(java.util.List) NodeScheduler.selectExactNodes(io.trino.execution.scheduler.NodeScheduler.selectExactNodes) Comparator.comparingLong(java.util.Comparator.comparingLong) IndexedPriorityQueue(io.trino.execution.resourcegroups.IndexedPriorityQueue) Split(io.trino.metadata.Split) Optional(java.util.Optional) NodeScheduler.calculateLowWatermark(io.trino.execution.scheduler.NodeScheduler.calculateLowWatermark) NO_NODES_AVAILABLE(io.trino.spi.StandardErrorCode.NO_NODES_AVAILABLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) NodeScheduler.toWhenHasSplitQueueSpaceFuture(io.trino.execution.scheduler.NodeScheduler.toWhenHasSplitQueueSpaceFuture) NodeScheduler.getAllNodes(io.trino.execution.scheduler.NodeScheduler.getAllNodes) HostAddress(io.trino.spi.HostAddress) TrinoException(io.trino.spi.TrinoException) InternalNode(io.trino.metadata.InternalNode) Split(io.trino.metadata.Split) HashSet(java.util.HashSet)

Example 23 with RemoteTask

use of io.trino.execution.RemoteTask in project trino by trinodb.

the class NodeScheduler method toWhenHasSplitQueueSpaceFuture.

public static ListenableFuture<Void> toWhenHasSplitQueueSpaceFuture(Set<InternalNode> blockedNodes, List<RemoteTask> existingTasks, long weightSpaceThreshold) {
    if (blockedNodes.isEmpty()) {
        return immediateVoidFuture();
    }
    Map<String, RemoteTask> nodeToTaskMap = new HashMap<>();
    for (RemoteTask task : existingTasks) {
        nodeToTaskMap.put(task.getNodeId(), task);
    }
    List<ListenableFuture<Void>> blockedFutures = blockedNodes.stream().map(InternalNode::getNodeIdentifier).map(nodeToTaskMap::get).filter(Objects::nonNull).map(remoteTask -> remoteTask.whenSplitQueueHasSpace(weightSpaceThreshold)).collect(toImmutableList());
    if (blockedFutures.isEmpty()) {
        return immediateVoidFuture();
    }
    return asVoid(whenAnyCompleteCancelOthers(blockedFutures));
}
Also used : ListenableFuture(com.google.common.util.concurrent.ListenableFuture) NodeTaskMap(io.trino.execution.NodeTaskMap) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) SplitWeight(io.trino.spi.SplitWeight) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) InetAddress(java.net.InetAddress) HashSet(java.util.HashSet) CatalogName(io.trino.connector.CatalogName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HashMultimap(com.google.common.collect.HashMultimap) ImmutableList(com.google.common.collect.ImmutableList) MoreFutures.whenAnyCompleteCancelOthers(io.airlift.concurrent.MoreFutures.whenAnyCompleteCancelOthers) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) LinkedHashSet(java.util.LinkedHashSet) Futures.immediateVoidFuture(com.google.common.util.concurrent.Futures.immediateVoidFuture) Iterator(java.util.Iterator) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) RemoteTask(io.trino.execution.RemoteTask) Set(java.util.Set) UnknownHostException(java.net.UnknownHostException) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) Objects(java.util.Objects) Futures(com.google.common.util.concurrent.Futures) InternalNode(io.trino.metadata.InternalNode) List(java.util.List) Math.addExact(java.lang.Math.addExact) Split(io.trino.metadata.Split) Optional(java.util.Optional) HostAddress(io.trino.spi.HostAddress) Session(io.trino.Session) HashMap(java.util.HashMap) Objects(java.util.Objects) RemoteTask(io.trino.execution.RemoteTask) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNode(io.trino.metadata.InternalNode)

Example 24 with RemoteTask

use of io.trino.execution.RemoteTask in project trino by trinodb.

the class PipelinedStageExecution method failTask.

@Override
public synchronized void failTask(TaskId taskId, Throwable failureCause) {
    RemoteTask task = requireNonNull(tasks.get(taskId.getPartitionId()), () -> "task not found: " + taskId);
    task.fail(failureCause);
    fail(failureCause);
}
Also used : RemoteTask(io.trino.execution.RemoteTask)

Example 25 with RemoteTask

use of io.trino.execution.RemoteTask in project trino by trinodb.

the class PipelinedStageExecution method scheduleTask.

@Override
public synchronized Optional<RemoteTask> scheduleTask(InternalNode node, int partition, Multimap<PlanNodeId, Split> initialSplits, Multimap<PlanNodeId, Lifespan> noMoreSplitsForLifespan) {
    if (stateMachine.getState().isDone()) {
        return Optional.empty();
    }
    checkArgument(!tasks.containsKey(partition), "A task for partition %s already exists", partition);
    OutputBuffers outputBuffers = outputBufferManagers.get(stage.getFragment().getId()).getOutputBuffers();
    Optional<RemoteTask> optionalTask = stage.createTask(node, partition, attempt, bucketToPartition, outputBuffers, initialSplits, ImmutableMultimap.of(), ImmutableSet.of());
    if (optionalTask.isEmpty()) {
        return Optional.empty();
    }
    RemoteTask task = optionalTask.get();
    tasks.put(partition, task);
    ImmutableMultimap.Builder<PlanNodeId, Split> exchangeSplits = ImmutableMultimap.builder();
    sourceTasks.forEach((fragmentId, sourceTask) -> {
        TaskStatus status = sourceTask.getTaskStatus();
        if (status.getState() != TaskState.FINISHED) {
            PlanNodeId planNodeId = exchangeSources.get(fragmentId).getId();
            exchangeSplits.put(planNodeId, createExchangeSplit(sourceTask, task));
        }
    });
    allTasks.add(task.getTaskId());
    task.addSplits(exchangeSplits.build());
    noMoreSplitsForLifespan.forEach(task::noMoreSplits);
    completeSources.forEach(task::noMoreSplits);
    task.addStateChangeListener(this::updateTaskStatus);
    task.addStateChangeListener(this::updateCompletedDriverGroups);
    task.start();
    taskLifecycleListener.taskCreated(stage.getFragment().getId(), task);
    // update output buffers
    OutputBufferId outputBufferId = new OutputBufferId(task.getTaskId().getPartitionId());
    updateSourceTasksOutputBuffers(outputBufferManager -> outputBufferManager.addOutputBuffer(outputBufferId));
    return Optional.of(task);
}
Also used : PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) OutputBuffers(io.trino.execution.buffer.OutputBuffers) RemoteTask(io.trino.execution.RemoteTask) OutputBufferId(io.trino.execution.buffer.OutputBuffers.OutputBufferId) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) Split(io.trino.metadata.Split) RemoteSplit(io.trino.split.RemoteSplit) TaskStatus(io.trino.execution.TaskStatus)

Aggregations

RemoteTask (io.trino.execution.RemoteTask)26 InternalNode (io.trino.metadata.InternalNode)11 Test (org.testng.annotations.Test)11 NodeTaskMap (io.trino.execution.NodeTaskMap)9 ImmutableList (com.google.common.collect.ImmutableList)7 MockRemoteTask (io.trino.execution.MockRemoteTaskFactory.MockRemoteTask)7 PartitionedSplitsInfo (io.trino.execution.PartitionedSplitsInfo)7 PipelinedStageExecution.createPipelinedStageExecution (io.trino.execution.scheduler.PipelinedStageExecution.createPipelinedStageExecution)7 SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler (io.trino.execution.scheduler.SourcePartitionedScheduler.newSourcePartitionedSchedulerAsStageScheduler)7 Split (io.trino.metadata.Split)7 PlanFragment (io.trino.sql.planner.PlanFragment)7 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)5 List (java.util.List)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 Duration (io.airlift.units.Duration)4 Lifespan (io.trino.execution.Lifespan)4 TaskId (io.trino.execution.TaskId)4 Optional (java.util.Optional)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3