Search in sources :

Example 1 with SplitBatch

use of com.facebook.presto.split.SplitSource.SplitBatch in project presto by prestodb.

the class PrestoSparkPartitionedSplitAssigner method getNextBatch.

@Override
public Optional<SetMultimap<Integer, ScheduledSplit>> getNextBatch() {
    if (splitSource.isFinished()) {
        return Optional.empty();
    }
    List<ScheduledSplit> scheduledSplits = new ArrayList<>();
    while (true) {
        int remaining = maxBatchSize - scheduledSplits.size();
        if (remaining <= 0) {
            break;
        }
        SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), min(remaining, 1000)));
        for (Split split : splitBatch.getSplits()) {
            scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScanNodeId, split));
        }
        if (splitBatch.isLastBatch() || splitSource.isFinished()) {
            break;
        }
    }
    return Optional.of(assignSplitsToTasks(scheduledSplits));
}
Also used : ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) ArrayList(java.util.ArrayList) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) Split(com.facebook.presto.metadata.Split) ScheduledSplit(com.facebook.presto.execution.ScheduledSplit) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch)

Example 2 with SplitBatch

use of com.facebook.presto.split.SplitSource.SplitBatch in project presto by prestodb.

the class SourcePartitionedScheduler method schedule.

@Override
public synchronized ScheduleResult schedule() {
    dropListenersFromWhenFinishedOrNewLifespansAdded();
    int overallSplitAssignmentCount = 0;
    ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
    List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
    boolean anyBlockedOnPlacements = false;
    boolean anyBlockedOnNextSplitBatch = false;
    boolean anyNotBlocked = false;
    for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
        Lifespan lifespan = entry.getKey();
        ScheduleGroup scheduleGroup = entry.getValue();
        if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
            verify(scheduleGroup.nextSplitBatchFuture == null);
        } else if (scheduleGroup.pendingSplits.isEmpty()) {
            // try to get the next batch
            if (scheduleGroup.nextSplitBatchFuture == null) {
                scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize);
                long start = System.nanoTime();
                addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
            }
            if (scheduleGroup.nextSplitBatchFuture.isDone()) {
                SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
                scheduleGroup.nextSplitBatchFuture = null;
                scheduleGroup.pendingSplits = new HashSet<>(nextSplits.getSplits());
                if (nextSplits.isLastBatch()) {
                    if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && scheduleGroup.pendingSplits.isEmpty()) {
                        // Add an empty split in case no splits have been produced for the source.
                        // For source operators, they never take input, but they may produce output.
                        // This is well handled by Presto execution engine.
                        // However, there are certain non-source operators that may produce output without any input,
                        // for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
                        // Scheduling an empty split kicks off necessary driver instantiation to make this work.
                        scheduleGroup.pendingSplits.add(new Split(splitSource.getConnectorId(), splitSource.getTransactionHandle(), new EmptySplit(splitSource.getConnectorId()), lifespan, NON_CACHEABLE));
                    }
                    scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
                }
            } else {
                overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
                anyBlockedOnNextSplitBatch = true;
                continue;
            }
        }
        Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
        if (!scheduleGroup.pendingSplits.isEmpty()) {
            if (!scheduleGroup.placementFuture.isDone()) {
                anyBlockedOnPlacements = true;
                continue;
            }
            if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
                scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
            }
            if (state == State.INITIALIZED) {
                state = State.SPLITS_ADDED;
            }
            // calculate placements for splits
            SplitPlacementResult splitPlacementResult = splitPlacementPolicy.computeAssignments(scheduleGroup.pendingSplits);
            splitAssignment = splitPlacementResult.getAssignments();
            // remove splits with successful placements
            // AbstractSet.removeAll performs terribly here.
            splitAssignment.values().forEach(scheduleGroup.pendingSplits::remove);
            overallSplitAssignmentCount += splitAssignment.size();
            // if not completed placed, mark scheduleGroup as blocked on placement
            if (!scheduleGroup.pendingSplits.isEmpty()) {
                scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
                overallBlockedFutures.add(scheduleGroup.placementFuture);
                anyBlockedOnPlacements = true;
            }
        }
        // if no new splits will be assigned, update state and attach completion event
        Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
        if (scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
            scheduleGroup.state = ScheduleGroupState.DONE;
            if (!lifespan.isTaskWide()) {
                InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
                noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
            }
        }
        // assign the splits with successful placements
        overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
        // As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
        if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
            anyNotBlocked = true;
        }
    }
    // (by calling `notifyAllLifespansFinishedExecution`)
    if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (!groupedExecution && lifespanAdded && scheduleGroups.isEmpty() && splitSource.isFinished())) {
        switch(state) {
            case INITIALIZED:
                // But this shouldn't be possible. See usage of EmptySplit in this method.
                throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
            case SPLITS_ADDED:
                state = State.NO_MORE_SPLITS;
                splitSource.close();
            // fall through
            case NO_MORE_SPLITS:
                state = State.FINISHED;
                whenFinishedOrNewLifespanAdded.set(null);
            // fall through
            case FINISHED:
                return ScheduleResult.nonBlocked(true, overallNewTasks.build(), overallSplitAssignmentCount);
            default:
                throw new IllegalStateException("Unknown state");
        }
    }
    if (anyNotBlocked) {
        return ScheduleResult.nonBlocked(false, overallNewTasks.build(), overallSplitAssignmentCount);
    }
    if (anyBlockedOnPlacements) {
        // In a broadcast join, output buffers of the tasks in build source stage have to
        // hold onto all data produced before probe side task scheduling finishes,
        // even if the data is acknowledged by all known consumers. This is because
        // new consumers may be added until the probe side task scheduling finishes.
        // 
        // As a result, the following line is necessary to prevent deadlock
        // due to neither build nor probe can make any progress.
        // The build side blocks due to a full output buffer.
        // In the meantime the probe side split cannot be consumed since
        // builder side hash table construction has not finished.
        // 
        // TODO: When SourcePartitionedScheduler is used as a SourceScheduler, it shouldn't need to worry about
        // task scheduling and creation -- these are done by the StageScheduler.
        overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
    }
    ScheduleResult.BlockedReason blockedReason;
    if (anyBlockedOnNextSplitBatch) {
        blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
    } else {
        blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
    }
    overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
    return ScheduleResult.blocked(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
Also used : ArrayList(java.util.ArrayList) EmptySplit(com.facebook.presto.split.EmptySplit) RemoteTask(com.facebook.presto.execution.RemoteTask) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNode(com.facebook.presto.metadata.InternalNode) EmptySplit(com.facebook.presto.split.EmptySplit) Split(com.facebook.presto.metadata.Split) Lifespan(com.facebook.presto.execution.Lifespan) HashSet(java.util.HashSet) BucketedSplitPlacementPolicy(com.facebook.presto.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy)

Example 3 with SplitBatch

use of com.facebook.presto.split.SplitSource.SplitBatch in project presto by prestodb.

the class ExtractSpatialJoins method loadKdbTree.

private static KdbTree loadKdbTree(String tableName, Session session, Metadata metadata, SplitManager splitManager, PageSourceManager pageSourceManager) {
    QualifiedObjectName name = toQualifiedObjectName(tableName, session.getCatalog().get(), session.getSchema().get());
    TableHandle tableHandle = metadata.getTableHandle(session, name).orElseThrow(() -> new PrestoException(INVALID_SPATIAL_PARTITIONING, format("Table not found: %s", name)));
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    List<ColumnHandle> visibleColumnHandles = columnHandles.values().stream().filter(handle -> !metadata.getColumnMetadata(session, tableHandle, handle).isHidden()).collect(toImmutableList());
    checkSpatialPartitioningTable(visibleColumnHandles.size() == 1, "Expected single column for table %s, but found %s columns", name, columnHandles.size());
    ColumnHandle kdbTreeColumn = Iterables.getOnlyElement(visibleColumnHandles);
    TableLayoutResult layout = metadata.getLayout(session, tableHandle, Constraint.alwaysTrue(), Optional.of(ImmutableSet.of(kdbTreeColumn)));
    TableHandle newTableHandle = layout.getLayout().getNewTableHandle();
    Optional<KdbTree> kdbTree = Optional.empty();
    try (SplitSource splitSource = splitManager.getSplits(session, newTableHandle, UNGROUPED_SCHEDULING, WarningCollector.NOOP)) {
        while (!Thread.currentThread().isInterrupted()) {
            SplitBatch splitBatch = getFutureValue(splitSource.getNextBatch(NOT_PARTITIONED, Lifespan.taskWide(), 1000));
            List<Split> splits = splitBatch.getSplits();
            for (Split split : splits) {
                try (ConnectorPageSource pageSource = pageSourceManager.createPageSource(session, split, newTableHandle, ImmutableList.of(kdbTreeColumn))) {
                    do {
                        getFutureValue(pageSource.isBlocked());
                        Page page = pageSource.getNextPage();
                        if (page != null && page.getPositionCount() > 0) {
                            checkSpatialPartitioningTable(!kdbTree.isPresent(), "Expected exactly one row for table %s, but found more", name);
                            checkSpatialPartitioningTable(page.getPositionCount() == 1, "Expected exactly one row for table %s, but found %s rows", name, page.getPositionCount());
                            String kdbTreeJson = VARCHAR.getSlice(page.getBlock(0), 0).toStringUtf8();
                            try {
                                kdbTree = Optional.of(KdbTreeUtils.fromJson(kdbTreeJson));
                            } catch (IllegalArgumentException e) {
                                checkSpatialPartitioningTable(false, "Invalid JSON string for KDB tree: %s", e.getMessage());
                            }
                        }
                    } while (!pageSource.isFinished());
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            if (splitBatch.isLastBatch()) {
                break;
            }
        }
    }
    checkSpatialPartitioningTable(kdbTree.isPresent(), "Expected exactly one row for table %s, but got none", name);
    return kdbTree.get();
}
Also used : FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) WarningCollector(com.facebook.presto.spi.WarningCollector) Page(com.facebook.presto.common.Page) SpatialJoinNode(com.facebook.presto.sql.planner.plan.SpatialJoinNode) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) TypeSignature(com.facebook.presto.common.type.TypeSignature) MoreFutures.getFutureValue(com.facebook.airlift.concurrent.MoreFutures.getFutureValue) NOT_PARTITIONED(com.facebook.presto.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SpatialJoinUtils.getFlippedFunctionHandle(com.facebook.presto.util.SpatialJoinUtils.getFlippedFunctionHandle) Pattern(com.facebook.presto.matching.Pattern) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Capture(com.facebook.presto.matching.Capture) SpatialJoinUtils.flip(com.facebook.presto.util.SpatialJoinUtils.flip) Map(java.util.Map) UNGROUPED_SCHEDULING(com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) LOCAL(com.facebook.presto.spi.plan.ProjectNode.Locality.LOCAL) QualifiedObjectName(com.facebook.presto.common.QualifiedObjectName) SystemSessionProperties.isSpatialJoinEnabled(com.facebook.presto.SystemSessionProperties.isSpatialJoinEnabled) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) CallExpression(com.facebook.presto.spi.relation.CallExpression) Splitter(com.google.common.base.Splitter) SplitSource(com.facebook.presto.split.SplitSource) Lifespan(com.facebook.presto.execution.Lifespan) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SplitManager(com.facebook.presto.split.SplitManager) String.format(java.lang.String.format) KDB_TREE(com.facebook.presto.common.type.KdbTreeType.KDB_TREE) UncheckedIOException(java.io.UncheckedIOException) FunctionMetadata(com.facebook.presto.spi.function.FunctionMetadata) List(java.util.List) SpatialJoinUtils.extractSupportedSpatialFunctions(com.facebook.presto.util.SpatialJoinUtils.extractSupportedSpatialFunctions) KdbTreeUtils(com.facebook.presto.geospatial.KdbTreeUtils) ProjectNode(com.facebook.presto.spi.plan.ProjectNode) Capture.newCapture(com.facebook.presto.matching.Capture.newCapture) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Optional(java.util.Optional) CAST(com.facebook.presto.metadata.CastType.CAST) VariablesExtractor.extractUnique(com.facebook.presto.sql.planner.VariablesExtractor.extractUnique) INNER(com.facebook.presto.sql.planner.plan.JoinNode.Type.INNER) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) Iterables(com.google.common.collect.Iterables) TableLayoutResult(com.facebook.presto.metadata.TableLayoutResult) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) Captures(com.facebook.presto.matching.Captures) Assignments(com.facebook.presto.spi.plan.Assignments) Result(com.facebook.presto.sql.planner.iterative.Rule.Result) PrestoException(com.facebook.presto.spi.PrestoException) Patterns.join(com.facebook.presto.sql.planner.plan.Patterns.join) TypeSignatureProvider.fromTypes(com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes) Patterns.filter(com.facebook.presto.sql.planner.plan.Patterns.filter) FilterNode(com.facebook.presto.spi.plan.FilterNode) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch) RowExpressionNodeInliner(com.facebook.presto.expressions.RowExpressionNodeInliner) ImmutableList(com.google.common.collect.ImmutableList) PageSourceManager(com.facebook.presto.split.PageSourceManager) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) SpatialJoinUtils.extractSupportedSpatialComparisons(com.facebook.presto.util.SpatialJoinUtils.extractSupportedSpatialComparisons) ArrayType(com.facebook.presto.common.type.ArrayType) TableHandle(com.facebook.presto.spi.TableHandle) Expressions(com.facebook.presto.sql.relational.Expressions) KdbTree(com.facebook.presto.geospatial.KdbTree) UnnestNode(com.facebook.presto.sql.planner.plan.UnnestNode) Type(com.facebook.presto.common.type.Type) RowExpression(com.facebook.presto.spi.relation.RowExpression) JoinNode(com.facebook.presto.sql.planner.plan.JoinNode) INVALID_SPATIAL_PARTITIONING(com.facebook.presto.spi.StandardErrorCode.INVALID_SPATIAL_PARTITIONING) SystemSessionProperties.getSpatialPartitioningTableName(com.facebook.presto.SystemSessionProperties.getSpatialPartitioningTableName) Session(com.facebook.presto.Session) Rule(com.facebook.presto.sql.planner.iterative.Rule) Constraint(com.facebook.presto.spi.Constraint) IOException(java.io.IOException) OperatorType(com.facebook.presto.common.function.OperatorType) Patterns.source(com.facebook.presto.sql.planner.plan.Patterns.source) PlanNode(com.facebook.presto.spi.plan.PlanNode) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) TypeSignature.parseTypeSignature(com.facebook.presto.common.type.TypeSignature.parseTypeSignature) LEFT(com.facebook.presto.sql.planner.plan.JoinNode.Type.LEFT) ColumnHandle(com.facebook.presto.spi.ColumnHandle) FunctionHandle(com.facebook.presto.spi.function.FunctionHandle) Split(com.facebook.presto.metadata.Split) Context(com.facebook.presto.sql.planner.iterative.Rule.Context) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Metadata(com.facebook.presto.metadata.Metadata) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) ColumnHandle(com.facebook.presto.spi.ColumnHandle) KdbTree(com.facebook.presto.geospatial.KdbTree) PrestoException(com.facebook.presto.spi.PrestoException) Page(com.facebook.presto.common.Page) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) TableLayoutResult(com.facebook.presto.metadata.TableLayoutResult) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) QualifiedObjectName(com.facebook.presto.common.QualifiedObjectName) SplitBatch(com.facebook.presto.split.SplitSource.SplitBatch) TableHandle(com.facebook.presto.spi.TableHandle) SplitSource(com.facebook.presto.split.SplitSource) Split(com.facebook.presto.metadata.Split)

Aggregations

Split (com.facebook.presto.metadata.Split)3 SplitBatch (com.facebook.presto.split.SplitSource.SplitBatch)3 Lifespan (com.facebook.presto.execution.Lifespan)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 ArrayList (java.util.ArrayList)2 MoreFutures.getFutureValue (com.facebook.airlift.concurrent.MoreFutures.getFutureValue)1 Session (com.facebook.presto.Session)1 SystemSessionProperties.getSpatialPartitioningTableName (com.facebook.presto.SystemSessionProperties.getSpatialPartitioningTableName)1 SystemSessionProperties.isSpatialJoinEnabled (com.facebook.presto.SystemSessionProperties.isSpatialJoinEnabled)1 Page (com.facebook.presto.common.Page)1 QualifiedObjectName (com.facebook.presto.common.QualifiedObjectName)1 OperatorType (com.facebook.presto.common.function.OperatorType)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)1 KDB_TREE (com.facebook.presto.common.type.KdbTreeType.KDB_TREE)1 Type (com.facebook.presto.common.type.Type)1 TypeSignature (com.facebook.presto.common.type.TypeSignature)1 TypeSignature.parseTypeSignature (com.facebook.presto.common.type.TypeSignature.parseTypeSignature)1 VARCHAR (com.facebook.presto.common.type.VarcharType.VARCHAR)1 RemoteTask (com.facebook.presto.execution.RemoteTask)1