Search in sources :

Example 1 with EmptySplit

use of com.facebook.presto.split.EmptySplit in project presto by prestodb.

the class ScanFilterAndProjectOperator method addSplit.

@Override
public Supplier<Optional<UpdatablePageSource>> addSplit(Split split) {
    requireNonNull(split, "split is null");
    checkState(this.split == null, "Table scan split already set");
    if (finishing) {
        return Optional::empty;
    }
    this.split = split;
    Object splitInfo = split.getInfo();
    if (splitInfo != null) {
        operatorContext.setInfoSupplier(Suppliers.ofInstance(new SplitOperatorInfo(splitInfo)));
    }
    blocked.set(null);
    if (split.getConnectorSplit() instanceof EmptySplit) {
        pageSource = new EmptySplitPageSource();
    }
    return () -> {
        if (pageSource instanceof UpdatablePageSource) {
            return Optional.of((UpdatablePageSource) pageSource);
        }
        return Optional.empty();
    };
}
Also used : UpdatablePageSource(com.facebook.presto.spi.UpdatablePageSource) EmptySplit(com.facebook.presto.split.EmptySplit) EmptySplitPageSource(com.facebook.presto.split.EmptySplitPageSource)

Example 2 with EmptySplit

use of com.facebook.presto.split.EmptySplit in project presto by prestodb.

the class SourcePartitionedScheduler method schedule.

@Override
public synchronized ScheduleResult schedule() {
    dropListenersFromWhenFinishedOrNewLifespansAdded();
    int overallSplitAssignmentCount = 0;
    ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
    List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
    boolean anyBlockedOnPlacements = false;
    boolean anyBlockedOnNextSplitBatch = false;
    boolean anyNotBlocked = false;
    for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
        Lifespan lifespan = entry.getKey();
        ScheduleGroup scheduleGroup = entry.getValue();
        if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
            verify(scheduleGroup.nextSplitBatchFuture == null);
        } else if (scheduleGroup.pendingSplits.isEmpty()) {
            // try to get the next batch
            if (scheduleGroup.nextSplitBatchFuture == null) {
                scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize);
                long start = System.nanoTime();
                addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
            }
            if (scheduleGroup.nextSplitBatchFuture.isDone()) {
                SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
                scheduleGroup.nextSplitBatchFuture = null;
                scheduleGroup.pendingSplits = new HashSet<>(nextSplits.getSplits());
                if (nextSplits.isLastBatch()) {
                    if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && scheduleGroup.pendingSplits.isEmpty()) {
                        // Add an empty split in case no splits have been produced for the source.
                        // For source operators, they never take input, but they may produce output.
                        // This is well handled by Presto execution engine.
                        // However, there are certain non-source operators that may produce output without any input,
                        // for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
                        // Scheduling an empty split kicks off necessary driver instantiation to make this work.
                        scheduleGroup.pendingSplits.add(new Split(splitSource.getConnectorId(), splitSource.getTransactionHandle(), new EmptySplit(splitSource.getConnectorId()), lifespan, NON_CACHEABLE));
                    }
                    scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
                }
            } else {
                overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
                anyBlockedOnNextSplitBatch = true;
                continue;
            }
        }
        Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
        if (!scheduleGroup.pendingSplits.isEmpty()) {
            if (!scheduleGroup.placementFuture.isDone()) {
                anyBlockedOnPlacements = true;
                continue;
            }
            if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
                scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
            }
            if (state == State.INITIALIZED) {
                state = State.SPLITS_ADDED;
            }
            // calculate placements for splits
            SplitPlacementResult splitPlacementResult = splitPlacementPolicy.computeAssignments(scheduleGroup.pendingSplits);
            splitAssignment = splitPlacementResult.getAssignments();
            // remove splits with successful placements
            // AbstractSet.removeAll performs terribly here.
            splitAssignment.values().forEach(scheduleGroup.pendingSplits::remove);
            overallSplitAssignmentCount += splitAssignment.size();
            // if not completed placed, mark scheduleGroup as blocked on placement
            if (!scheduleGroup.pendingSplits.isEmpty()) {
                scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
                overallBlockedFutures.add(scheduleGroup.placementFuture);
                anyBlockedOnPlacements = true;
            }
        }
        // if no new splits will be assigned, update state and attach completion event
        Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
        if (scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
            scheduleGroup.state = ScheduleGroupState.DONE;
            if (!lifespan.isTaskWide()) {
                InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
                noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
            }
        }
        // assign the splits with successful placements
        overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
        // As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
        if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
            anyNotBlocked = true;
        }
    }
    // (by calling `notifyAllLifespansFinishedExecution`)
    if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (!groupedExecution && lifespanAdded && scheduleGroups.isEmpty() && splitSource.isFinished())) {
        switch(state) {
            case INITIALIZED:
                // But this shouldn't be possible. See usage of EmptySplit in this method.
                throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
            case SPLITS_ADDED:
                state = State.NO_MORE_SPLITS;
                splitSource.close();
            // fall through
            case NO_MORE_SPLITS:
                state = State.FINISHED;
                whenFinishedOrNewLifespanAdded.set(null);
            // fall through
            case FINISHED:
                return ScheduleResult.nonBlocked(true, overallNewTasks.build(), overallSplitAssignmentCount);
            default:
                throw new IllegalStateException("Unknown state");
        }
    }
    if (anyNotBlocked) {
        return ScheduleResult.nonBlocked(false, overallNewTasks.build(), overallSplitAssignmentCount);
    }
    if (anyBlockedOnPlacements) {
        // In a broadcast join, output buffers of the tasks in build source stage have to
        // hold onto all data produced before probe side task scheduling finishes,
        // even if the data is acknowledged by all known consumers. This is because
        // new consumers may be added until the probe side task scheduling finishes.
        // 
        // As a result, the following line is necessary to prevent deadlock
        // due to neither build nor probe can make any progress.
        // The build side blocks due to a full output buffer.
        // In the meantime the probe side split cannot be consumed since
        // builder side hash table construction has not finished.
        // 
        // TODO: When SourcePartitionedScheduler is used as a SourceScheduler, it shouldn't need to worry about
        // task scheduling and creation -- these are done by the StageScheduler.
        overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
    }
    ScheduleResult.BlockedReason blockedReason;
    if (anyBlockedOnNextSplitBatch) {
        blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
    } else {
        blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
    }
    overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
    return ScheduleResult.blocked(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
Also used : ArrayList(java.util.ArrayList) EmptySplit(com.facebook.presto.split.EmptySplit) RemoteTask(com.facebook.presto.execution.RemoteTask) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNode(com.facebook.presto.metadata.InternalNode) EmptySplit(com.facebook.presto.split.EmptySplit) Split(com.facebook.presto.metadata.Split) Lifespan(com.facebook.presto.execution.Lifespan) HashSet(java.util.HashSet) BucketedSplitPlacementPolicy(com.facebook.presto.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy)

Example 3 with EmptySplit

use of com.facebook.presto.split.EmptySplit in project presto by prestodb.

the class NodePartitioningManager method getSplitToBucket.

private ToIntFunction<Split> getSplitToBucket(Session session, PartitioningHandle partitioningHandle) {
    ConnectorNodePartitioningProvider partitioningProvider = partitioningProviderManager.getPartitioningProvider(partitioningHandle.getConnectorId().get());
    ToIntFunction<ConnectorSplit> splitBucketFunction = partitioningProvider.getSplitBucketFunction(partitioningHandle.getTransactionHandle().orElse(null), session.toConnectorSession(), partitioningHandle.getConnectorHandle());
    checkArgument(splitBucketFunction != null, "No partitioning %s", partitioningHandle);
    return split -> {
        int bucket;
        if (split.getConnectorSplit() instanceof EmptySplit) {
            bucket = split.getLifespan().isTaskWide() ? 0 : split.getLifespan().getId();
        } else {
            bucket = splitBucketFunction.applyAsInt(split.getConnectorSplit());
        }
        if (!split.getLifespan().isTaskWide()) {
            checkArgument(split.getLifespan().getId() == bucket);
        }
        return bucket;
    };
}
Also used : IntStream(java.util.stream.IntStream) ConnectorPartitionHandle(com.facebook.presto.spi.connector.ConnectorPartitionHandle) DEAD(com.facebook.presto.metadata.InternalNode.NodeStatus.DEAD) DynamicBucketNodeMap(com.facebook.presto.execution.scheduler.group.DynamicBucketNodeMap) NodeSelectionStats(com.facebook.presto.execution.scheduler.nodeSelection.NodeSelectionStats) NodeScheduler(com.facebook.presto.execution.scheduler.NodeScheduler) PrestoException(com.facebook.presto.spi.PrestoException) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) EmptySplit(com.facebook.presto.split.EmptySplit) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Node(com.facebook.presto.spi.Node) ImmutableList(com.google.common.collect.ImmutableList) BucketNodeMap(com.facebook.presto.execution.scheduler.BucketNodeMap) Objects.requireNonNull(java.util.Objects.requireNonNull) SystemSessionProperties.getMaxTasksPerStage(com.facebook.presto.SystemSessionProperties.getMaxTasksPerStage) Type(com.facebook.presto.common.type.Type) BiMap(com.google.common.collect.BiMap) BucketPartitionFunction(com.facebook.presto.operator.BucketPartitionFunction) NODE_SELECTION_NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NODE_SELECTION_NOT_SUPPORTED) FixedBucketNodeMap(com.facebook.presto.execution.scheduler.FixedBucketNodeMap) Session(com.facebook.presto.Session) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ToIntFunction(java.util.function.ToIntFunction) NodeSelectionStrategy(com.facebook.presto.spi.schedule.NodeSelectionStrategy) PartitionFunction(com.facebook.presto.operator.PartitionFunction) String.format(java.lang.String.format) InternalNode(com.facebook.presto.metadata.InternalNode) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HashBiMap(com.google.common.collect.HashBiMap) List(java.util.List) BucketFunction(com.facebook.presto.spi.BucketFunction) Split(com.facebook.presto.metadata.Split) ConnectorBucketNodeMap(com.facebook.presto.spi.connector.ConnectorBucketNodeMap) Optional(java.util.Optional) ConnectorId(com.facebook.presto.spi.ConnectorId) Collections(java.util.Collections) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) EmptySplit(com.facebook.presto.split.EmptySplit) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit)

Example 4 with EmptySplit

use of com.facebook.presto.split.EmptySplit in project presto by prestodb.

the class TableScanOperator method addSplit.

@Override
public Supplier<Optional<UpdatablePageSource>> addSplit(Split split) {
    requireNonNull(split, "split is null");
    checkState(this.split == null, "Table scan split already set");
    if (finished) {
        return Optional::empty;
    }
    this.split = split;
    Object splitInfo = split.getInfo();
    if (splitInfo != null) {
        operatorContext.setInfoSupplier(Suppliers.ofInstance(new SplitOperatorInfo(splitInfo)));
    }
    blocked.set(null);
    if (split.getConnectorSplit() instanceof EmptySplit) {
        source = new EmptySplitPageSource();
    }
    return () -> {
        if (source instanceof UpdatablePageSource) {
            return Optional.of((UpdatablePageSource) source);
        }
        return Optional.empty();
    };
}
Also used : UpdatablePageSource(com.facebook.presto.spi.UpdatablePageSource) EmptySplit(com.facebook.presto.split.EmptySplit) EmptySplitPageSource(com.facebook.presto.split.EmptySplitPageSource)

Aggregations

EmptySplit (com.facebook.presto.split.EmptySplit)4 InternalNode (com.facebook.presto.metadata.InternalNode)2 Split (com.facebook.presto.metadata.Split)2 UpdatablePageSource (com.facebook.presto.spi.UpdatablePageSource)2 EmptySplitPageSource (com.facebook.presto.split.EmptySplitPageSource)2 ArrayList (java.util.ArrayList)2 Session (com.facebook.presto.Session)1 SystemSessionProperties.getMaxTasksPerStage (com.facebook.presto.SystemSessionProperties.getMaxTasksPerStage)1 Type (com.facebook.presto.common.type.Type)1 Lifespan (com.facebook.presto.execution.Lifespan)1 RemoteTask (com.facebook.presto.execution.RemoteTask)1 BucketNodeMap (com.facebook.presto.execution.scheduler.BucketNodeMap)1 FixedBucketNodeMap (com.facebook.presto.execution.scheduler.FixedBucketNodeMap)1 BucketedSplitPlacementPolicy (com.facebook.presto.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy)1 NodeScheduler (com.facebook.presto.execution.scheduler.NodeScheduler)1 DynamicBucketNodeMap (com.facebook.presto.execution.scheduler.group.DynamicBucketNodeMap)1 NodeSelectionStats (com.facebook.presto.execution.scheduler.nodeSelection.NodeSelectionStats)1 DEAD (com.facebook.presto.metadata.InternalNode.NodeStatus.DEAD)1 BucketPartitionFunction (com.facebook.presto.operator.BucketPartitionFunction)1 PartitionFunction (com.facebook.presto.operator.PartitionFunction)1