Search in sources :

Example 1 with Lifespan

use of com.facebook.presto.execution.Lifespan in project presto by prestodb.

the class FixedLifespanScheduler method schedule.

public SettableFuture schedule(SourceScheduler scheduler) {
    // Return a new future even if newDriverGroupReady has not finished.
    // Returning the same SettableFuture instance could lead to ListenableFuture retaining too many listener objects.
    checkState(initialScheduled);
    List<Lifespan> recentlyCompletedDriverGroups;
    synchronized (this) {
        recentlyCompletedDriverGroups = ImmutableList.copyOf(this.recentlyCompletelyExecutedDriverGroups);
        this.recentlyCompletelyExecutedDriverGroups.clear();
        newDriverGroupReady = SettableFuture.create();
    }
    for (Lifespan driverGroup : recentlyCompletedDriverGroups) {
        IntListIterator driverGroupsIterator = nodeToDriverGroupsMap.get(driverGroupToNodeMap.get(driverGroup.getId()));
        if (!driverGroupsIterator.hasNext()) {
            continue;
        }
        int driverGroupId = driverGroupsIterator.nextInt();
        scheduler.startLifespan(Lifespan.driverGroup(driverGroupId), partitionHandles.get(driverGroupId));
    }
    return newDriverGroupReady;
}
Also used : IntListIterator(it.unimi.dsi.fastutil.ints.IntListIterator) Lifespan(com.facebook.presto.execution.Lifespan)

Example 2 with Lifespan

use of com.facebook.presto.execution.Lifespan in project presto by prestodb.

the class SourcePartitionedScheduler method schedule.

@Override
public synchronized ScheduleResult schedule() {
    dropListenersFromWhenFinishedOrNewLifespansAdded();
    int overallSplitAssignmentCount = 0;
    ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
    List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
    boolean anyBlockedOnPlacements = false;
    boolean anyBlockedOnNextSplitBatch = false;
    boolean anyNotBlocked = false;
    for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
        Lifespan lifespan = entry.getKey();
        ScheduleGroup scheduleGroup = entry.getValue();
        if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
            verify(scheduleGroup.nextSplitBatchFuture == null);
        } else if (scheduleGroup.pendingSplits.isEmpty()) {
            // try to get the next batch
            if (scheduleGroup.nextSplitBatchFuture == null) {
                scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize);
                long start = System.nanoTime();
                addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
            }
            if (scheduleGroup.nextSplitBatchFuture.isDone()) {
                SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
                scheduleGroup.nextSplitBatchFuture = null;
                scheduleGroup.pendingSplits = new HashSet<>(nextSplits.getSplits());
                if (nextSplits.isLastBatch()) {
                    if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && scheduleGroup.pendingSplits.isEmpty()) {
                        // Add an empty split in case no splits have been produced for the source.
                        // For source operators, they never take input, but they may produce output.
                        // This is well handled by Presto execution engine.
                        // However, there are certain non-source operators that may produce output without any input,
                        // for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
                        // Scheduling an empty split kicks off necessary driver instantiation to make this work.
                        scheduleGroup.pendingSplits.add(new Split(splitSource.getConnectorId(), splitSource.getTransactionHandle(), new EmptySplit(splitSource.getConnectorId()), lifespan, NON_CACHEABLE));
                    }
                    scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
                }
            } else {
                overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
                anyBlockedOnNextSplitBatch = true;
                continue;
            }
        }
        Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
        if (!scheduleGroup.pendingSplits.isEmpty()) {
            if (!scheduleGroup.placementFuture.isDone()) {
                anyBlockedOnPlacements = true;
                continue;
            }
            if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
                scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
            }
            if (state == State.INITIALIZED) {
                state = State.SPLITS_ADDED;
            }
            // calculate placements for splits
            SplitPlacementResult splitPlacementResult = splitPlacementPolicy.computeAssignments(scheduleGroup.pendingSplits);
            splitAssignment = splitPlacementResult.getAssignments();
            // remove splits with successful placements
            // AbstractSet.removeAll performs terribly here.
            splitAssignment.values().forEach(scheduleGroup.pendingSplits::remove);
            overallSplitAssignmentCount += splitAssignment.size();
            // if not completed placed, mark scheduleGroup as blocked on placement
            if (!scheduleGroup.pendingSplits.isEmpty()) {
                scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
                overallBlockedFutures.add(scheduleGroup.placementFuture);
                anyBlockedOnPlacements = true;
            }
        }
        // if no new splits will be assigned, update state and attach completion event
        Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
        if (scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
            scheduleGroup.state = ScheduleGroupState.DONE;
            if (!lifespan.isTaskWide()) {
                InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
                noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
            }
        }
        // assign the splits with successful placements
        overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
        // As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
        if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
            anyNotBlocked = true;
        }
    }
    // (by calling `notifyAllLifespansFinishedExecution`)
    if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (!groupedExecution && lifespanAdded && scheduleGroups.isEmpty() && splitSource.isFinished())) {
        switch(state) {
            case INITIALIZED:
                // But this shouldn't be possible. See usage of EmptySplit in this method.
                throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
            case SPLITS_ADDED:
                state = State.NO_MORE_SPLITS;
                splitSource.close();
            // fall through
            case NO_MORE_SPLITS:
                state = State.FINISHED;
                whenFinishedOrNewLifespanAdded.set(null);
            // fall through
            case FINISHED:
                return ScheduleResult.nonBlocked(true, overallNewTasks.build(), overallSplitAssignmentCount);
            default:
                throw new IllegalStateException("Unknown state");
        }
    }
    if (anyNotBlocked) {
        return ScheduleResult.nonBlocked(false, overallNewTasks.build(), overallSplitAssignmentCount);
    }
    if (anyBlockedOnPlacements) {
        // In a broadcast join, output buffers of the tasks in build source stage have to
        // hold onto all data produced before probe side task scheduling finishes,
        // even if the data is acknowledged by all known consumers. This is because
        // new consumers may be added until the probe side task scheduling finishes.
        // 
        // As a result, the following line is necessary to prevent deadlock
        // due to neither build nor probe can make any progress.
        // The build side blocks due to a full output buffer.
        // In the meantime the probe side split cannot be consumed since
        // builder side hash table construction has not finished.
        // 
        // TODO: When SourcePartitionedScheduler is used as a SourceScheduler, it shouldn't need to worry about
        // task scheduling and creation -- these are done by the StageScheduler.
        overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
    }
    ScheduleResult.BlockedReason blockedReason;
    if (anyBlockedOnNextSplitBatch) {
        blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
    } else {
        blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
    }
    overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
    return ScheduleResult.blocked(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
Also used : ArrayList(java.util.ArrayList) EmptySplit(com.facebook.presto.split.EmptySplit) RemoteTask(com.facebook.presto.execution.RemoteTask) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNode(com.facebook.presto.metadata.InternalNode) EmptySplit(com.facebook.presto.split.EmptySplit) Split(com.facebook.presto.metadata.Split) Lifespan(com.facebook.presto.execution.Lifespan) HashSet(java.util.HashSet) BucketedSplitPlacementPolicy(com.facebook.presto.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy)

Example 3 with Lifespan

use of com.facebook.presto.execution.Lifespan in project presto by prestodb.

the class DynamicLifespanScheduler method onLifespanExecutionFinished.

@Override
public void onLifespanExecutionFinished(Iterable<Lifespan> newlyCompletelyExecutedDriverGroups) {
    checkState(initialScheduled, "onLifespanExecutionFinished should only be called after initial scheduling finished");
    SettableFuture<?> newDriverGroupReady;
    synchronized (this) {
        for (Lifespan newlyCompletelyExecutedDriverGroup : newlyCompletelyExecutedDriverGroups) {
            checkArgument(!newlyCompletelyExecutedDriverGroup.isTaskWide());
            int driverGroupId = newlyCompletelyExecutedDriverGroup.getId();
            availableTasks.enqueue(taskByDriverGroup[driverGroupId]);
            totalLifespanExecutionFinished++;
            runningDriverGroupIdsByTask[taskByDriverGroup[driverGroupId]].remove(driverGroupId);
        }
        newDriverGroupReady = this.newDriverGroupReady;
    }
    newDriverGroupReady.set(null);
}
Also used : Lifespan(com.facebook.presto.execution.Lifespan)

Example 4 with Lifespan

use of com.facebook.presto.execution.Lifespan in project presto by prestodb.

the class HttpRemoteTask method getSource.

private synchronized TaskSource getSource(PlanNodeId planNodeId) {
    Set<ScheduledSplit> splits = pendingSplits.get(planNodeId);
    boolean pendingNoMoreSplits = Boolean.TRUE.equals(this.noMoreSplits.get(planNodeId));
    boolean noMoreSplits = this.noMoreSplits.containsKey(planNodeId);
    Set<Lifespan> noMoreSplitsForLifespan = pendingNoMoreSplitsForLifespan.get(planNodeId);
    TaskSource element = null;
    if (!splits.isEmpty() || !noMoreSplitsForLifespan.isEmpty() || pendingNoMoreSplits) {
        element = new TaskSource(planNodeId, splits, noMoreSplitsForLifespan, noMoreSplits);
    }
    return element;
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) Lifespan(com.facebook.presto.execution.Lifespan) TaskSource(com.facebook.presto.execution.TaskSource)

Example 5 with Lifespan

use of com.facebook.presto.execution.Lifespan in project presto by prestodb.

the class SerializedPageReference method dereferencePages.

public static void dereferencePages(List<SerializedPageReference> serializedPageReferences, PagesReleasedListener onPagesReleased) {
    requireNonNull(onPagesReleased, "onPagesReleased is null");
    if (requireNonNull(serializedPageReferences, "serializedPageReferences is null").isEmpty()) {
        return;
    }
    Lifespan currentLifespan = null;
    int currentLifespanPages = 0;
    long releasedMemoryBytes = 0;
    for (SerializedPageReference serializedPageReference : serializedPageReferences) {
        if (serializedPageReference.dereferencePage()) {
            if (!serializedPageReference.lifespan.equals(currentLifespan)) {
                if (currentLifespan != null) {
                    // Flush the current run of pages for the same lifespan
                    onPagesReleased.onPagesReleased(currentLifespan, currentLifespanPages, releasedMemoryBytes);
                }
                currentLifespan = serializedPageReference.lifespan;
                currentLifespanPages = 0;
                releasedMemoryBytes = 0;
            }
            currentLifespanPages++;
            releasedMemoryBytes += serializedPageReference.getRetainedSizeInBytes();
        }
    }
    // Flush pending updates if present
    if (currentLifespan != null) {
        onPagesReleased.onPagesReleased(currentLifespan, currentLifespanPages, releasedMemoryBytes);
    }
}
Also used : Lifespan(com.facebook.presto.execution.Lifespan)

Aggregations

Lifespan (com.facebook.presto.execution.Lifespan)12 RemoteTask (com.facebook.presto.execution.RemoteTask)4 Split (com.facebook.presto.metadata.Split)3 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)3 ArrayList (java.util.ArrayList)3 Optional (java.util.Optional)3 ScheduledSplit (com.facebook.presto.execution.ScheduledSplit)2 TaskSource (com.facebook.presto.execution.TaskSource)2 PagesReleasedListener (com.facebook.presto.execution.buffer.SerializedPageReference.PagesReleasedListener)2 InternalNode (com.facebook.presto.metadata.InternalNode)2 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)2 EmptySplit (com.facebook.presto.split.EmptySplit)2 Preconditions.checkState (com.google.common.base.Preconditions.checkState)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 Test (org.testng.annotations.Test)2 Page (com.facebook.presto.common.Page)1 BigintType (com.facebook.presto.common.type.BigintType)1 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)1 BufferResult.emptyResults (com.facebook.presto.execution.buffer.BufferResult.emptyResults)1