Search in sources :

Example 71 with PlanNodeId

use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.

the class TestingOperatorContext method create.

public static OperatorContext create(ScheduledExecutorService scheduledExecutor) {
    Executor executor = MoreExecutors.directExecutor();
    TaskContext taskContext = TestingTaskContext.createTaskContext(executor, scheduledExecutor, TestingSession.testSessionBuilder().build());
    MemoryTrackingContext pipelineMemoryContext = new MemoryTrackingContext(newSimpleAggregatedMemoryContext(), newSimpleAggregatedMemoryContext(), newSimpleAggregatedMemoryContext());
    PipelineContext pipelineContext = new PipelineContext(1, taskContext, executor, scheduledExecutor, pipelineMemoryContext, false, false, false);
    DriverContext driverContext = new DriverContext(pipelineContext, executor, scheduledExecutor, pipelineMemoryContext, Lifespan.taskWide(), Optional.empty(), 0L);
    OperatorContext operatorContext = driverContext.addOperatorContext(1, new PlanNodeId("test"), "operator type");
    return operatorContext;
}
Also used : PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) Executor(java.util.concurrent.Executor) TestingTaskContext(com.facebook.presto.testing.TestingTaskContext) MemoryTrackingContext(com.facebook.presto.memory.context.MemoryTrackingContext)

Example 72 with PlanNodeId

use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.

the class SqlStageExecution method scheduleTask.

private synchronized RemoteTask scheduleTask(InternalNode node, TaskId taskId, Multimap<PlanNodeId, Split> sourceSplits) {
    checkArgument(!allTasks.contains(taskId), "A task with id %s already exists", taskId);
    ImmutableMultimap.Builder<PlanNodeId, Split> initialSplits = ImmutableMultimap.builder();
    initialSplits.putAll(sourceSplits);
    sourceTasks.forEach((planNodeId, task) -> {
        TaskStatus status = task.getTaskStatus();
        if (status.getState() != TaskState.FINISHED) {
            initialSplits.put(planNodeId, createRemoteSplitFor(taskId, task.getRemoteTaskLocation(), task.getTaskId()));
        }
    });
    OutputBuffers outputBuffers = this.outputBuffers.get();
    checkState(outputBuffers != null, "Initial output buffers must be set before a task can be scheduled");
    RemoteTask task = remoteTaskFactory.createRemoteTask(session, taskId, node, planFragment, initialSplits.build(), outputBuffers, nodeTaskMap.createTaskStatsTracker(node, taskId), summarizeTaskInfo, tableWriteInfo);
    completeSources.forEach(task::noMoreSplits);
    allTasks.add(taskId);
    tasks.computeIfAbsent(node, key -> newConcurrentHashSet()).add(task);
    nodeTaskMap.addTask(node, task);
    task.addStateChangeListener(new StageTaskListener(taskId));
    task.addFinalTaskInfoListener(this::updateFinalTaskInfo);
    if (!stateMachine.getState().isDone()) {
        task.start();
    } else {
        // stage finished while we were scheduling this task
        task.abort();
    }
    return task;
}
Also used : PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) RemoteSourceNode(com.facebook.presto.sql.planner.plan.RemoteSourceNode) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Duration(io.airlift.units.Duration) TableWriteInfo(com.facebook.presto.execution.scheduler.TableWriteInfo) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) PlanFragment(com.facebook.presto.sql.planner.PlanFragment) HashMultimap(com.google.common.collect.HashMultimap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) URI(java.net.URI) ImmutableSet(com.google.common.collect.ImmutableSet) REMOTE_TASK_MISMATCH(com.facebook.presto.spi.StandardErrorCode.REMOTE_TASK_MISMATCH) SplitSchedulerStats(com.facebook.presto.execution.scheduler.SplitSchedulerStats) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) PAGE_TRANSPORT_TIMEOUT(com.facebook.presto.spi.StandardErrorCode.PAGE_TRANSPORT_TIMEOUT) GuardedBy(javax.annotation.concurrent.GuardedBy) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) TOO_MANY_REQUESTS_FAILED(com.facebook.presto.spi.StandardErrorCode.TOO_MANY_REQUESTS_FAILED) Entry(java.util.Map.Entry) Optional(java.util.Optional) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) OutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers) ErrorCode(com.facebook.presto.spi.ErrorCode) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SystemSessionProperties.getMaxFailedTaskPercentage(com.facebook.presto.SystemSessionProperties.getMaxFailedTaskPercentage) PrestoException(com.facebook.presto.spi.PrestoException) Multimap(com.google.common.collect.Multimap) AtomicReference(java.util.concurrent.atomic.AtomicReference) REMOTE_HOST_GONE(com.facebook.presto.spi.StandardErrorCode.REMOTE_HOST_GONE) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Objects.requireNonNull(java.util.Objects.requireNonNull) RemoteSplit(com.facebook.presto.split.RemoteSplit) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) GENERIC_RECOVERY_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_RECOVERY_ERROR) RemoteTransactionHandle(com.facebook.presto.metadata.RemoteTransactionHandle) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ExecutorService(java.util.concurrent.ExecutorService) PAGE_TRANSPORT_ERROR(com.facebook.presto.spi.StandardErrorCode.PAGE_TRANSPORT_ERROR) Executor(java.util.concurrent.Executor) Session(com.facebook.presto.Session) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) InternalNode(com.facebook.presto.metadata.InternalNode) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) StateChangeListener(com.facebook.presto.execution.StateMachine.StateChangeListener) REMOTE_TASK_ERROR(com.facebook.presto.spi.StandardErrorCode.REMOTE_TASK_ERROR) FailureDetector(com.facebook.presto.failureDetector.FailureDetector) REMOTE_CONNECTOR_ID(com.facebook.presto.operator.ExchangeOperator.REMOTE_CONNECTOR_ID) Split(com.facebook.presto.metadata.Split) GONE(com.facebook.presto.failureDetector.FailureDetector.State.GONE) BYTE(io.airlift.units.DataSize.Unit.BYTE) OutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) RemoteSplit(com.facebook.presto.split.RemoteSplit) Split(com.facebook.presto.metadata.Split)

Example 73 with PlanNodeId

use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.

the class SqlTaskExecution method scheduleTableScanSource.

private synchronized void scheduleTableScanSource(TaskSource sourceUpdate) {
    mergeIntoPendingSplits(sourceUpdate.getPlanNodeId(), sourceUpdate.getSplits(), sourceUpdate.getNoMoreSplitsForLifespan(), sourceUpdate.isNoMoreSplits());
    while (true) {
        // SchedulingLifespanManager tracks how far each Lifespan has been scheduled. Here is an example.
        // Let's say there are 4 source pipelines/nodes: A, B, C, and D, in scheduling order.
        // And we're processing 3 concurrent lifespans at a time. In this case, we could have
        // 
        // * Lifespan 10:  A   B  [C]  D; i.e. Pipeline A and B has finished scheduling (but not necessarily finished running).
        // * Lifespan 20: [A]  B   C   D
        // * Lifespan 30:  A  [B]  C   D
        // 
        // To recap, SchedulingLifespanManager records the next scheduling source node for each lifespan.
        Iterator<SchedulingLifespan> activeLifespans = schedulingLifespanManager.getActiveLifespans();
        boolean madeProgress = false;
        while (activeLifespans.hasNext()) {
            SchedulingLifespan schedulingLifespan = activeLifespans.next();
            Lifespan lifespan = schedulingLifespan.getLifespan();
            // This is why getSchedulingPlanNode returns an Optional.
            while (true) {
                Optional<PlanNodeId> optionalSchedulingPlanNode = schedulingLifespan.getSchedulingPlanNode();
                if (!optionalSchedulingPlanNode.isPresent()) {
                    break;
                }
                PlanNodeId schedulingPlanNode = optionalSchedulingPlanNode.get();
                DriverSplitRunnerFactory partitionedDriverRunnerFactory = driverRunnerFactoriesWithSplitLifeCycle.get(schedulingPlanNode);
                PendingSplits pendingSplits = pendingSplitsByPlanNode.get(schedulingPlanNode).getLifespan(lifespan);
                // Enqueue driver runners with driver group lifecycle for this driver life cycle, if not already enqueued.
                if (!lifespan.isTaskWide() && !schedulingLifespan.getAndSetDriversForDriverGroupLifeCycleScheduled()) {
                    scheduleDriversForDriverGroupLifeCycle(lifespan);
                }
                // Enqueue driver runners with split lifecycle for this plan node and driver life cycle combination.
                ImmutableList.Builder<DriverSplitRunner> runners = ImmutableList.builder();
                for (ScheduledSplit scheduledSplit : pendingSplits.removeAllSplits()) {
                    // create a new driver for the split
                    runners.add(partitionedDriverRunnerFactory.createDriverRunner(scheduledSplit, lifespan));
                }
                enqueueDriverSplitRunner(false, runners.build());
                // move on to the next plan node.
                if (pendingSplits.getState() != NO_MORE_SPLITS) {
                    break;
                }
                partitionedDriverRunnerFactory.noMoreDriverRunner(ImmutableList.of(lifespan));
                pendingSplits.markAsCleanedUp();
                schedulingLifespan.nextPlanNode();
                madeProgress = true;
                if (schedulingLifespan.isDone()) {
                    break;
                }
            }
        }
        if (!madeProgress) {
            break;
        }
    }
    if (sourceUpdate.isNoMoreSplits()) {
        schedulingLifespanManager.noMoreSplits(sourceUpdate.getPlanNodeId());
    }
}
Also used : PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ImmutableList(com.google.common.collect.ImmutableList)

Example 74 with PlanNodeId

use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.

the class HttpRemoteTask method addSplits.

@Override
public synchronized void addSplits(Multimap<PlanNodeId, Split> splitsBySource) {
    requireNonNull(splitsBySource, "splitsBySource is null");
    // only add pending split if not done
    if (getTaskStatus().getState().isDone()) {
        return;
    }
    boolean needsUpdate = false;
    for (Entry<PlanNodeId, Collection<Split>> entry : splitsBySource.asMap().entrySet()) {
        PlanNodeId sourceId = entry.getKey();
        Collection<Split> splits = entry.getValue();
        boolean isTableScanSource = tableScanPlanNodeIds.contains(sourceId);
        checkState(!noMoreSplits.containsKey(sourceId), "noMoreSplits has already been set for %s", sourceId);
        int added = 0;
        long addedWeight = 0;
        for (Split split : splits) {
            if (pendingSplits.put(sourceId, new ScheduledSplit(nextSplitId.getAndIncrement(), sourceId, split))) {
                if (isTableScanSource) {
                    added++;
                    addedWeight = addExact(addedWeight, split.getSplitWeight().getRawValue());
                }
            }
        }
        if (isTableScanSource) {
            pendingSourceSplitCount += added;
            pendingSourceSplitsWeight = addExact(pendingSourceSplitsWeight, addedWeight);
            updateTaskStats();
        }
        needsUpdate = true;
    }
    updateSplitQueueSpace();
    if (needsUpdate) {
        this.needsUpdate.set(true);
        scheduleUpdate();
    }
}
Also used : PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Collection(java.util.Collection) Split(com.facebook.presto.metadata.Split) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit)

Example 75 with PlanNodeId

use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.

the class TestNodeScheduler method testTopologyAwareScheduling.

@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
    TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
    NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
    InMemoryNodeManager nodeManager = new InMemoryNodeManager();
    ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
    nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
    nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
    List<InternalNode> nodes = nodeBuilder.build();
    nodeManager.addNode(CONNECTOR_ID, nodes);
    // contents of taskMap indicate the node-task map for the current stage
    Map<InternalNode, RemoteTask> taskMap = new HashMap<>();
    NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
    TestNetworkTopology topology = new TestNetworkTopology();
    NetworkLocationCache locationCache = new NetworkLocationCache(topology) {

        @Override
        public NetworkLocation get(HostAddress host) {
            // Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
            if (host.getHostText().startsWith("host")) {
                return topology.locate(host);
            } else {
                return super.get(host);
            }
        }
    };
    NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, nodeManager, new NodeSelectionStats(), nodeSchedulerConfig, nodeTaskMap, new Duration(5, SECONDS), new ThrowingNodeTtlFetcherManager(), new NoOpQueryManager(), new SimpleTtlNodeSelectorConfig());
    NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID);
    // Fill up the nodes with non-local data
    ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
    for (int i = 0; i < (25 + 11) * 3; i++) {
        nonRackLocalBuilder.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1))));
    }
    Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
    Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
    int task = 0;
    for (InternalNode node : assignments.keySet()) {
        TaskId taskId = new TaskId("test", 1, 0, task);
        task++;
        MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeTaskMap.createTaskStatsTracker(node, taskId));
        remoteTask.startSplits(25);
        nodeTaskMap.addTask(node, remoteTask);
        taskMap.put(node, remoteTask);
    }
    // Continue assigning to fill up part of the queue
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
    // Check that 3 of the splits were rejected, since they're non-local
    assertEquals(nonRackLocalSplits.size(), 3);
    // Assign rack-local splits
    ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
    HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
    HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
    for (int i = 0; i < 6 * 2; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(dataHost1)));
    }
    for (int i = 0; i < 6; i++) {
        rackLocalSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(dataHost2)));
    }
    assignments = nodeSelector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
    // Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
    // loading of the NetworkLocationCache
    boolean cacheRefreshed = false;
    while (!cacheRefreshed) {
        cacheRefreshed = true;
        if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
            cacheRefreshed = false;
        }
        MILLISECONDS.sleep(10);
    }
    assignments = nodeSelector.computeAssignments(unassigned, ImmutableList.copyOf(taskMap.values())).getAssignments();
    for (InternalNode node : assignments.keySet()) {
        RemoteTask remoteTask = taskMap.get(node);
        remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
    }
    unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
    assertEquals(unassigned.size(), 3);
    int rack1 = 0;
    int rack2 = 0;
    for (Split split : unassigned) {
        String rack = topology.locate(split.getPreferredNodes(new ModularHashingNodeProvider(nodeSelector.getAllNodes())).get(0)).getSegments().get(0);
        switch(rack) {
            case "rack1":
                rack1++;
                break;
            case "rack2":
                rack2++;
                break;
            default:
                fail();
        }
    }
    assertEquals(rack1, 2);
    assertEquals(rack2, 1);
    // Assign local splits
    ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
    localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1))));
    localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1))));
    localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1))));
    assignments = nodeSelector.computeAssignments(localSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
    assertEquals(assignments.size(), 3);
    assertEquals(assignments.keySet().size(), 3);
}
Also used : HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) NodeSchedulerConfig(com.facebook.presto.execution.scheduler.NodeSchedulerConfig) HostAddress(com.facebook.presto.spi.HostAddress) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) NodeScheduler(com.facebook.presto.execution.scheduler.NodeScheduler) TestingTransactionHandle(com.facebook.presto.testing.TestingTransactionHandle) SimpleTtlNodeSelectorConfig(com.facebook.presto.execution.scheduler.nodeSelection.SimpleTtlNodeSelectorConfig) HashSet(java.util.HashSet) NetworkLocationCache(com.facebook.presto.execution.scheduler.NetworkLocationCache) Duration(io.airlift.units.Duration) ThrowingNodeTtlFetcherManager(com.facebook.presto.ttl.nodettlfetchermanagers.ThrowingNodeTtlFetcherManager) InMemoryNodeManager(com.facebook.presto.metadata.InMemoryNodeManager) NoOpQueryManager(com.facebook.presto.dispatcher.NoOpQueryManager) NodeSelectionStats(com.facebook.presto.execution.scheduler.nodeSelection.NodeSelectionStats) ModularHashingNodeProvider(com.facebook.presto.execution.scheduler.ModularHashingNodeProvider) InternalNode(com.facebook.presto.metadata.InternalNode) NodeSelector(com.facebook.presto.execution.scheduler.nodeSelection.NodeSelector) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) Split(com.facebook.presto.metadata.Split) Test(org.testng.annotations.Test)

Aggregations

PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)204 Test (org.testng.annotations.Test)123 Page (com.facebook.presto.common.Page)83 MaterializedResult (com.facebook.presto.testing.MaterializedResult)52 Type (com.facebook.presto.common.type.Type)47 VariableReferenceExpression (com.facebook.presto.spi.relation.VariableReferenceExpression)43 ImmutableList (com.google.common.collect.ImmutableList)43 RowPagesBuilder (com.facebook.presto.RowPagesBuilder)39 DataSize (io.airlift.units.DataSize)39 Optional (java.util.Optional)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 JoinNode (com.facebook.presto.sql.planner.plan.JoinNode)25 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)23 VariableStatsEstimate (com.facebook.presto.cost.VariableStatsEstimate)23 Split (com.facebook.presto.metadata.Split)23 OperatorFactory (com.facebook.presto.operator.OperatorFactory)23 PlanNodeStatsEstimate (com.facebook.presto.cost.PlanNodeStatsEstimate)22 RowExpression (com.facebook.presto.spi.relation.RowExpression)21 PlanMatchPattern.values (com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values)21 JOIN_DISTRIBUTION_TYPE (com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE)20