use of com.facebook.presto.split.CloseableSplitSourceProvider in project presto by prestodb.
the class PrestoSparkRddFactory method createRdd.
private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
Optional<Integer> numberOfShufflePartitions = Optional.empty();
Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
shuffleInputRddMap.put(input.getKey().toString(), rdd);
if (!numberOfShufflePartitions.isPresent()) {
numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
} else {
checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
}
}
PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
if (!tableScans.isEmpty()) {
try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
}
} else if (rddInputs.size() == 0) {
checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
// In case of no inputs we still need to schedule a task.
// Task with no inputs may produce results (e.g.: ValuesNode).
// To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
// Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
} else {
taskSourceRdd = Optional.empty();
}
return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
use of com.facebook.presto.split.CloseableSplitSourceProvider in project presto by prestodb.
the class SqlQueryExecution method planDistribution.
private void planDistribution(PlanRoot plan) {
CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits);
// ensure split sources are closed
stateMachine.addStateChangeListener(state -> {
if (state.isDone()) {
splitSourceProvider.close();
}
});
// if query was canceled, skip creating scheduler
if (stateMachine.isDone()) {
return;
}
SubPlan outputStagePlan = plan.getRoot();
// record output field
stateMachine.setColumns(((OutputNode) outputStagePlan.getFragment().getRoot()).getColumnNames(), outputStagePlan.getFragment().getTypes());
PartitioningHandle partitioningHandle = outputStagePlan.getFragment().getPartitioningScheme().getPartitioning().getHandle();
OutputBuffers rootOutputBuffers;
if (isSpoolingOutputBufferEnabled(getSession())) {
rootOutputBuffers = createSpoolingOutputBuffers();
} else {
rootOutputBuffers = createInitialEmptyOutputBuffers(partitioningHandle).withBuffer(OUTPUT_BUFFER_ID, BROADCAST_PARTITION_ID).withNoMoreBufferIds();
}
SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, stateMachine.getWarningCollector());
// build the stage execution objects (this doesn't schedule execution)
SqlQuerySchedulerInterface scheduler = isUseLegacyScheduler(getSession()) ? LegacySqlQueryScheduler.createSqlQueryScheduler(locationFactory, executionPolicy, queryExecutor, schedulerStats, sectionExecutionFactory, remoteTaskFactory, splitSourceFactory, stateMachine.getSession(), metadata.getFunctionAndTypeManager(), stateMachine, outputStagePlan, rootOutputBuffers, plan.isSummarizeTaskInfos(), runtimePlanOptimizers, stateMachine.getWarningCollector(), idAllocator, variableAllocator.get(), planChecker, metadata, sqlParser, partialResultQueryManager) : SqlQueryScheduler.createSqlQueryScheduler(locationFactory, executionPolicy, queryExecutor, schedulerStats, sectionExecutionFactory, remoteTaskFactory, splitSourceFactory, internalNodeManager, stateMachine.getSession(), stateMachine, outputStagePlan, plan.isSummarizeTaskInfos(), metadata.getFunctionAndTypeManager(), runtimePlanOptimizers, stateMachine.getWarningCollector(), idAllocator, variableAllocator.get(), planChecker, metadata, sqlParser, partialResultQueryManager);
queryScheduler.set(scheduler);
// directly since the callback may have already fired
if (stateMachine.isDone()) {
scheduler.abort();
queryScheduler.set(null);
}
}
Aggregations