use of com.facebook.presto.execution.Lifespan in project presto by prestodb.
the class FixedLifespanScheduler method schedule.
public SettableFuture schedule(SourceScheduler scheduler) {
// Return a new future even if newDriverGroupReady has not finished.
// Returning the same SettableFuture instance could lead to ListenableFuture retaining too many listener objects.
checkState(initialScheduled);
List<Lifespan> recentlyCompletedDriverGroups;
synchronized (this) {
recentlyCompletedDriverGroups = ImmutableList.copyOf(this.recentlyCompletelyExecutedDriverGroups);
this.recentlyCompletelyExecutedDriverGroups.clear();
newDriverGroupReady = SettableFuture.create();
}
for (Lifespan driverGroup : recentlyCompletedDriverGroups) {
IntListIterator driverGroupsIterator = nodeToDriverGroupsMap.get(driverGroupToNodeMap.get(driverGroup.getId()));
if (!driverGroupsIterator.hasNext()) {
continue;
}
int driverGroupId = driverGroupsIterator.nextInt();
scheduler.startLifespan(Lifespan.driverGroup(driverGroupId), partitionHandles.get(driverGroupId));
}
return newDriverGroupReady;
}
use of com.facebook.presto.execution.Lifespan in project presto by prestodb.
the class SourcePartitionedScheduler method schedule.
@Override
public synchronized ScheduleResult schedule() {
dropListenersFromWhenFinishedOrNewLifespansAdded();
int overallSplitAssignmentCount = 0;
ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
boolean anyBlockedOnPlacements = false;
boolean anyBlockedOnNextSplitBatch = false;
boolean anyNotBlocked = false;
for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
Lifespan lifespan = entry.getKey();
ScheduleGroup scheduleGroup = entry.getValue();
if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
verify(scheduleGroup.nextSplitBatchFuture == null);
} else if (scheduleGroup.pendingSplits.isEmpty()) {
// try to get the next batch
if (scheduleGroup.nextSplitBatchFuture == null) {
scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize);
long start = System.nanoTime();
addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
}
if (scheduleGroup.nextSplitBatchFuture.isDone()) {
SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
scheduleGroup.nextSplitBatchFuture = null;
scheduleGroup.pendingSplits = new HashSet<>(nextSplits.getSplits());
if (nextSplits.isLastBatch()) {
if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && scheduleGroup.pendingSplits.isEmpty()) {
// Add an empty split in case no splits have been produced for the source.
// For source operators, they never take input, but they may produce output.
// This is well handled by Presto execution engine.
// However, there are certain non-source operators that may produce output without any input,
// for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
// Scheduling an empty split kicks off necessary driver instantiation to make this work.
scheduleGroup.pendingSplits.add(new Split(splitSource.getConnectorId(), splitSource.getTransactionHandle(), new EmptySplit(splitSource.getConnectorId()), lifespan, NON_CACHEABLE));
}
scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
}
} else {
overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
anyBlockedOnNextSplitBatch = true;
continue;
}
}
Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
if (!scheduleGroup.pendingSplits.isEmpty()) {
if (!scheduleGroup.placementFuture.isDone()) {
anyBlockedOnPlacements = true;
continue;
}
if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
}
if (state == State.INITIALIZED) {
state = State.SPLITS_ADDED;
}
// calculate placements for splits
SplitPlacementResult splitPlacementResult = splitPlacementPolicy.computeAssignments(scheduleGroup.pendingSplits);
splitAssignment = splitPlacementResult.getAssignments();
// remove splits with successful placements
// AbstractSet.removeAll performs terribly here.
splitAssignment.values().forEach(scheduleGroup.pendingSplits::remove);
overallSplitAssignmentCount += splitAssignment.size();
// if not completed placed, mark scheduleGroup as blocked on placement
if (!scheduleGroup.pendingSplits.isEmpty()) {
scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
overallBlockedFutures.add(scheduleGroup.placementFuture);
anyBlockedOnPlacements = true;
}
}
// if no new splits will be assigned, update state and attach completion event
Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
if (scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
scheduleGroup.state = ScheduleGroupState.DONE;
if (!lifespan.isTaskWide()) {
InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
}
}
// assign the splits with successful placements
overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
// As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
anyNotBlocked = true;
}
}
// (by calling `notifyAllLifespansFinishedExecution`)
if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (!groupedExecution && lifespanAdded && scheduleGroups.isEmpty() && splitSource.isFinished())) {
switch(state) {
case INITIALIZED:
// But this shouldn't be possible. See usage of EmptySplit in this method.
throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
case SPLITS_ADDED:
state = State.NO_MORE_SPLITS;
splitSource.close();
// fall through
case NO_MORE_SPLITS:
state = State.FINISHED;
whenFinishedOrNewLifespanAdded.set(null);
// fall through
case FINISHED:
return ScheduleResult.nonBlocked(true, overallNewTasks.build(), overallSplitAssignmentCount);
default:
throw new IllegalStateException("Unknown state");
}
}
if (anyNotBlocked) {
return ScheduleResult.nonBlocked(false, overallNewTasks.build(), overallSplitAssignmentCount);
}
if (anyBlockedOnPlacements) {
// In a broadcast join, output buffers of the tasks in build source stage have to
// hold onto all data produced before probe side task scheduling finishes,
// even if the data is acknowledged by all known consumers. This is because
// new consumers may be added until the probe side task scheduling finishes.
//
// As a result, the following line is necessary to prevent deadlock
// due to neither build nor probe can make any progress.
// The build side blocks due to a full output buffer.
// In the meantime the probe side split cannot be consumed since
// builder side hash table construction has not finished.
//
// TODO: When SourcePartitionedScheduler is used as a SourceScheduler, it shouldn't need to worry about
// task scheduling and creation -- these are done by the StageScheduler.
overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
}
ScheduleResult.BlockedReason blockedReason;
if (anyBlockedOnNextSplitBatch) {
blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
} else {
blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
}
overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
return ScheduleResult.blocked(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
use of com.facebook.presto.execution.Lifespan in project presto by prestodb.
the class DynamicLifespanScheduler method onLifespanExecutionFinished.
@Override
public void onLifespanExecutionFinished(Iterable<Lifespan> newlyCompletelyExecutedDriverGroups) {
checkState(initialScheduled, "onLifespanExecutionFinished should only be called after initial scheduling finished");
SettableFuture<?> newDriverGroupReady;
synchronized (this) {
for (Lifespan newlyCompletelyExecutedDriverGroup : newlyCompletelyExecutedDriverGroups) {
checkArgument(!newlyCompletelyExecutedDriverGroup.isTaskWide());
int driverGroupId = newlyCompletelyExecutedDriverGroup.getId();
availableTasks.enqueue(taskByDriverGroup[driverGroupId]);
totalLifespanExecutionFinished++;
runningDriverGroupIdsByTask[taskByDriverGroup[driverGroupId]].remove(driverGroupId);
}
newDriverGroupReady = this.newDriverGroupReady;
}
newDriverGroupReady.set(null);
}
use of com.facebook.presto.execution.Lifespan in project presto by prestodb.
the class HttpRemoteTask method getSource.
private synchronized TaskSource getSource(PlanNodeId planNodeId) {
Set<ScheduledSplit> splits = pendingSplits.get(planNodeId);
boolean pendingNoMoreSplits = Boolean.TRUE.equals(this.noMoreSplits.get(planNodeId));
boolean noMoreSplits = this.noMoreSplits.containsKey(planNodeId);
Set<Lifespan> noMoreSplitsForLifespan = pendingNoMoreSplitsForLifespan.get(planNodeId);
TaskSource element = null;
if (!splits.isEmpty() || !noMoreSplitsForLifespan.isEmpty() || pendingNoMoreSplits) {
element = new TaskSource(planNodeId, splits, noMoreSplitsForLifespan, noMoreSplits);
}
return element;
}
use of com.facebook.presto.execution.Lifespan in project presto by prestodb.
the class SerializedPageReference method dereferencePages.
public static void dereferencePages(List<SerializedPageReference> serializedPageReferences, PagesReleasedListener onPagesReleased) {
requireNonNull(onPagesReleased, "onPagesReleased is null");
if (requireNonNull(serializedPageReferences, "serializedPageReferences is null").isEmpty()) {
return;
}
Lifespan currentLifespan = null;
int currentLifespanPages = 0;
long releasedMemoryBytes = 0;
for (SerializedPageReference serializedPageReference : serializedPageReferences) {
if (serializedPageReference.dereferencePage()) {
if (!serializedPageReference.lifespan.equals(currentLifespan)) {
if (currentLifespan != null) {
// Flush the current run of pages for the same lifespan
onPagesReleased.onPagesReleased(currentLifespan, currentLifespanPages, releasedMemoryBytes);
}
currentLifespan = serializedPageReference.lifespan;
currentLifespanPages = 0;
releasedMemoryBytes = 0;
}
currentLifespanPages++;
releasedMemoryBytes += serializedPageReference.getRetainedSizeInBytes();
}
}
// Flush pending updates if present
if (currentLifespan != null) {
onPagesReleased.onPagesReleased(currentLifespan, currentLifespanPages, releasedMemoryBytes);
}
}
Aggregations