Search in sources :

Example 1 with CloseableSplitSourceProvider

use of com.facebook.presto.split.CloseableSplitSourceProvider in project presto by prestodb.

the class PrestoSparkRddFactory method createRdd.

private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
    checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
    PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
    SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
    Optional<Integer> numberOfShufflePartitions = Optional.empty();
    Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
    for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
        RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
        shuffleInputRddMap.put(input.getKey().toString(), rdd);
        if (!numberOfShufflePartitions.isPresent()) {
            numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
        } else {
            checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
        }
    }
    PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
    Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
    List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
    if (!tableScans.isEmpty()) {
        try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
            SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
            Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
            taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
        }
    } else if (rddInputs.size() == 0) {
        checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
        // In case of no inputs we still need to schedule a task.
        // Task with no inputs may produce results (e.g.: ValuesNode).
        // To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
        // Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
        taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
    } else {
        taskSourceRdd = Optional.empty();
    }
    return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
Also used : SerializedPrestoSparkTaskDescriptor(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor) PrestoSparkTaskDescriptor(com.facebook.presto.spark.PrestoSparkTaskDescriptor) HashMap(java.util.HashMap) SplitSourceFactory(com.facebook.presto.sql.planner.SplitSourceFactory) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) MutablePartitionId(com.facebook.presto.spark.classloader_interface.MutablePartitionId) RDD(org.apache.spark.rdd.RDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) PlanFragmentId(com.facebook.presto.sql.planner.plan.PlanFragmentId) PrestoSparkMutableRow(com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow) PrestoSparkTaskSourceRdd(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd) PrestoSparkTaskProcessor(com.facebook.presto.spark.classloader_interface.PrestoSparkTaskProcessor) SerializedPrestoSparkTaskDescriptor(com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor) CloseableSplitSourceProvider(com.facebook.presto.split.CloseableSplitSourceProvider) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) Tuple2(scala.Tuple2) SplitSource(com.facebook.presto.split.SplitSource) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap)

Example 2 with CloseableSplitSourceProvider

use of com.facebook.presto.split.CloseableSplitSourceProvider in project presto by prestodb.

the class SqlQueryExecution method planDistribution.

private void planDistribution(PlanRoot plan) {
    CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits);
    // ensure split sources are closed
    stateMachine.addStateChangeListener(state -> {
        if (state.isDone()) {
            splitSourceProvider.close();
        }
    });
    // if query was canceled, skip creating scheduler
    if (stateMachine.isDone()) {
        return;
    }
    SubPlan outputStagePlan = plan.getRoot();
    // record output field
    stateMachine.setColumns(((OutputNode) outputStagePlan.getFragment().getRoot()).getColumnNames(), outputStagePlan.getFragment().getTypes());
    PartitioningHandle partitioningHandle = outputStagePlan.getFragment().getPartitioningScheme().getPartitioning().getHandle();
    OutputBuffers rootOutputBuffers;
    if (isSpoolingOutputBufferEnabled(getSession())) {
        rootOutputBuffers = createSpoolingOutputBuffers();
    } else {
        rootOutputBuffers = createInitialEmptyOutputBuffers(partitioningHandle).withBuffer(OUTPUT_BUFFER_ID, BROADCAST_PARTITION_ID).withNoMoreBufferIds();
    }
    SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, stateMachine.getWarningCollector());
    // build the stage execution objects (this doesn't schedule execution)
    SqlQuerySchedulerInterface scheduler = isUseLegacyScheduler(getSession()) ? LegacySqlQueryScheduler.createSqlQueryScheduler(locationFactory, executionPolicy, queryExecutor, schedulerStats, sectionExecutionFactory, remoteTaskFactory, splitSourceFactory, stateMachine.getSession(), metadata.getFunctionAndTypeManager(), stateMachine, outputStagePlan, rootOutputBuffers, plan.isSummarizeTaskInfos(), runtimePlanOptimizers, stateMachine.getWarningCollector(), idAllocator, variableAllocator.get(), planChecker, metadata, sqlParser, partialResultQueryManager) : SqlQueryScheduler.createSqlQueryScheduler(locationFactory, executionPolicy, queryExecutor, schedulerStats, sectionExecutionFactory, remoteTaskFactory, splitSourceFactory, internalNodeManager, stateMachine.getSession(), stateMachine, outputStagePlan, plan.isSummarizeTaskInfos(), metadata.getFunctionAndTypeManager(), runtimePlanOptimizers, stateMachine.getWarningCollector(), idAllocator, variableAllocator.get(), planChecker, metadata, sqlParser, partialResultQueryManager);
    queryScheduler.set(scheduler);
    // directly since the callback may have already fired
    if (stateMachine.isDone()) {
        scheduler.abort();
        queryScheduler.set(null);
    }
}
Also used : OutputBuffers.createInitialEmptyOutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers) OutputBuffers.createSpoolingOutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers.createSpoolingOutputBuffers) OutputBuffers(com.facebook.presto.execution.buffer.OutputBuffers) CloseableSplitSourceProvider(com.facebook.presto.split.CloseableSplitSourceProvider) SplitSourceFactory(com.facebook.presto.sql.planner.SplitSourceFactory) SqlQuerySchedulerInterface(com.facebook.presto.execution.scheduler.SqlQuerySchedulerInterface) PartitioningHandle(com.facebook.presto.sql.planner.PartitioningHandle) SubPlan(com.facebook.presto.sql.planner.SubPlan)

Aggregations

CloseableSplitSourceProvider (com.facebook.presto.split.CloseableSplitSourceProvider)2 SplitSourceFactory (com.facebook.presto.sql.planner.SplitSourceFactory)2 OutputBuffers (com.facebook.presto.execution.buffer.OutputBuffers)1 OutputBuffers.createInitialEmptyOutputBuffers (com.facebook.presto.execution.buffer.OutputBuffers.createInitialEmptyOutputBuffers)1 OutputBuffers.createSpoolingOutputBuffers (com.facebook.presto.execution.buffer.OutputBuffers.createSpoolingOutputBuffers)1 SqlQuerySchedulerInterface (com.facebook.presto.execution.scheduler.SqlQuerySchedulerInterface)1 PrestoSparkTaskDescriptor (com.facebook.presto.spark.PrestoSparkTaskDescriptor)1 MutablePartitionId (com.facebook.presto.spark.classloader_interface.MutablePartitionId)1 PrestoSparkMutableRow (com.facebook.presto.spark.classloader_interface.PrestoSparkMutableRow)1 PrestoSparkTaskProcessor (com.facebook.presto.spark.classloader_interface.PrestoSparkTaskProcessor)1 PrestoSparkTaskSourceRdd (com.facebook.presto.spark.classloader_interface.PrestoSparkTaskSourceRdd)1 SerializedPrestoSparkTaskDescriptor (com.facebook.presto.spark.classloader_interface.SerializedPrestoSparkTaskDescriptor)1 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)1 TableScanNode (com.facebook.presto.spi.plan.TableScanNode)1 SplitSource (com.facebook.presto.split.SplitSource)1 PartitioningHandle (com.facebook.presto.sql.planner.PartitioningHandle)1 SubPlan (com.facebook.presto.sql.planner.SubPlan)1 PlanFragmentId (com.facebook.presto.sql.planner.plan.PlanFragmentId)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 HashMap (java.util.HashMap)1