use of com.facebook.presto.split.SplitSource in project presto by prestodb.
the class LocalQueryRunner method createDrivers.
public List<Driver> createDrivers(Session session, @Language("SQL") String sql, OutputFactory outputFactory, TaskContext taskContext) {
Plan plan = createPlan(session, sql);
if (printPlan) {
System.out.println(PlanPrinter.textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata, session));
}
SubPlan subplan = PlanFragmenter.createSubPlans(session, metadata, plan);
if (!subplan.getChildren().isEmpty()) {
throw new AssertionError("Expected subplan to have no children");
}
LocalExecutionPlanner executionPlanner = new LocalExecutionPlanner(metadata, sqlParser, Optional.empty(), pageSourceManager, indexManager, nodePartitioningManager, pageSinkManager, null, expressionCompiler, joinFilterFunctionCompiler, new IndexJoinLookupStats(), // make sure tests fail if compiler breaks
new CompilerConfig().setInterpreterEnabled(false), new TaskManagerConfig().setTaskConcurrency(4), spillerFactory, blockEncodingSerde, new PagesIndex.TestingFactory(), new JoinCompiler(), new LookupJoinOperators(new JoinProbeCompiler()));
// plan query
LocalExecutionPlan localExecutionPlan = executionPlanner.plan(session, subplan.getFragment().getRoot(), subplan.getFragment().getPartitioningScheme().getOutputLayout(), plan.getTypes(), outputFactory);
// generate sources
List<TaskSource> sources = new ArrayList<>();
long sequenceId = 0;
for (TableScanNode tableScan : findTableScanNodes(subplan.getFragment().getRoot())) {
TableLayoutHandle layout = tableScan.getLayout().get();
SplitSource splitSource = splitManager.getSplits(session, layout);
ImmutableSet.Builder<ScheduledSplit> scheduledSplits = ImmutableSet.builder();
while (!splitSource.isFinished()) {
for (Split split : getFutureValue(splitSource.getNextBatch(1000))) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScan.getId(), split));
}
}
sources.add(new TaskSource(tableScan.getId(), scheduledSplits.build(), true));
}
// create drivers
List<Driver> drivers = new ArrayList<>();
Map<PlanNodeId, DriverFactory> driverFactoriesBySource = new HashMap<>();
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
for (int i = 0; i < driverFactory.getDriverInstances().orElse(1); i++) {
if (driverFactory.getSourceId().isPresent()) {
checkState(driverFactoriesBySource.put(driverFactory.getSourceId().get(), driverFactory) == null);
} else {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver()).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
drivers.add(driver);
}
}
}
// add sources to the drivers
for (TaskSource source : sources) {
DriverFactory driverFactory = driverFactoriesBySource.get(source.getPlanNodeId());
checkState(driverFactory != null);
for (ScheduledSplit split : source.getSplits()) {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver()).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
driver.updateSource(new TaskSource(split.getPlanNodeId(), ImmutableSet.of(split), true));
drivers.add(driver);
}
}
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
driverFactory.close();
}
return ImmutableList.copyOf(drivers);
}
use of com.facebook.presto.split.SplitSource in project presto by prestodb.
the class PrestoSparkRddFactory method createRdd.
private <T extends PrestoSparkTaskOutput> JavaPairRDD<MutablePartitionId, T> createRdd(JavaSparkContext sparkContext, Session session, PlanFragment fragment, PrestoSparkTaskExecutorFactoryProvider executorFactoryProvider, CollectionAccumulator<SerializedTaskInfo> taskInfoCollector, CollectionAccumulator<PrestoSparkShuffleStats> shuffleStatsCollector, TableWriteInfo tableWriteInfo, Map<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> rddInputs, Map<PlanFragmentId, Broadcast<?>> broadcastInputs, Class<T> outputType) {
checkInputs(fragment.getRemoteSourceNodes(), rddInputs, broadcastInputs);
PrestoSparkTaskDescriptor taskDescriptor = new PrestoSparkTaskDescriptor(session.toSessionRepresentation(), session.getIdentity().getExtraCredentials(), fragment, tableWriteInfo);
SerializedPrestoSparkTaskDescriptor serializedTaskDescriptor = new SerializedPrestoSparkTaskDescriptor(taskDescriptorJsonCodec.toJsonBytes(taskDescriptor));
Optional<Integer> numberOfShufflePartitions = Optional.empty();
Map<String, RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>>> shuffleInputRddMap = new HashMap<>();
for (Map.Entry<PlanFragmentId, JavaPairRDD<MutablePartitionId, PrestoSparkMutableRow>> input : rddInputs.entrySet()) {
RDD<Tuple2<MutablePartitionId, PrestoSparkMutableRow>> rdd = input.getValue().rdd();
shuffleInputRddMap.put(input.getKey().toString(), rdd);
if (!numberOfShufflePartitions.isPresent()) {
numberOfShufflePartitions = Optional.of(rdd.getNumPartitions());
} else {
checkArgument(numberOfShufflePartitions.get() == rdd.getNumPartitions(), "Incompatible number of input partitions: %s != %s", numberOfShufflePartitions.get(), rdd.getNumPartitions());
}
}
PrestoSparkTaskProcessor<T> taskProcessor = new PrestoSparkTaskProcessor<>(executorFactoryProvider, serializedTaskDescriptor, taskInfoCollector, shuffleStatsCollector, toTaskProcessorBroadcastInputs(broadcastInputs), outputType);
Optional<PrestoSparkTaskSourceRdd> taskSourceRdd;
List<TableScanNode> tableScans = findTableScanNodes(fragment.getRoot());
if (!tableScans.isEmpty()) {
try (CloseableSplitSourceProvider splitSourceProvider = new CloseableSplitSourceProvider(splitManager::getSplits)) {
SplitSourceFactory splitSourceFactory = new SplitSourceFactory(splitSourceProvider, WarningCollector.NOOP);
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(fragment, session, tableWriteInfo);
taskSourceRdd = Optional.of(createTaskSourcesRdd(fragment.getId(), sparkContext, session, fragment.getPartitioning(), tableScans, splitSources, numberOfShufflePartitions));
}
} else if (rddInputs.size() == 0) {
checkArgument(fragment.getPartitioning().equals(SINGLE_DISTRIBUTION), "SINGLE_DISTRIBUTION partitioning is expected: %s", fragment.getPartitioning());
// In case of no inputs we still need to schedule a task.
// Task with no inputs may produce results (e.g.: ValuesNode).
// To force the task to be scheduled we create a PrestoSparkTaskSourceRdd that contains exactly one partition.
// Since there's also no table scans in the fragment, the list of TaskSource's for this partition is empty.
taskSourceRdd = Optional.of(new PrestoSparkTaskSourceRdd(sparkContext.sc(), ImmutableList.of(ImmutableList.of())));
} else {
taskSourceRdd = Optional.empty();
}
return JavaPairRDD.fromRDD(PrestoSparkTaskRdd.create(sparkContext.sc(), taskSourceRdd, shuffleInputRddMap, taskProcessor), classTag(MutablePartitionId.class), classTag(outputType));
}
use of com.facebook.presto.split.SplitSource in project presto by prestodb.
the class PrestoSparkRddFactory method createTaskSourcesRdd.
private PrestoSparkTaskSourceRdd createTaskSourcesRdd(PlanFragmentId fragmentId, JavaSparkContext sparkContext, Session session, PartitioningHandle partitioning, List<TableScanNode> tableScans, Map<PlanNodeId, SplitSource> splitSources, Optional<Integer> numberOfShufflePartitions) {
ListMultimap<Integer, SerializedPrestoSparkTaskSource> taskSourcesMap = ArrayListMultimap.create();
for (TableScanNode tableScan : tableScans) {
int totalNumberOfSplits = 0;
SplitSource splitSource = requireNonNull(splitSources.get(tableScan.getId()), "split source is missing for table scan node with id: " + tableScan.getId());
try (PrestoSparkSplitAssigner splitAssigner = createSplitAssigner(session, tableScan.getId(), splitSource, partitioning)) {
while (true) {
Optional<SetMultimap<Integer, ScheduledSplit>> batch = splitAssigner.getNextBatch();
if (!batch.isPresent()) {
break;
}
int numberOfSplitsInCurrentBatch = batch.get().size();
log.info("Found %s splits for table scan node with id %s", numberOfSplitsInCurrentBatch, tableScan.getId());
totalNumberOfSplits += numberOfSplitsInCurrentBatch;
taskSourcesMap.putAll(createTaskSources(tableScan.getId(), batch.get()));
}
}
log.info("Total number of splits for table scan node with id %s: %s", tableScan.getId(), totalNumberOfSplits);
}
long allTaskSourcesSerializedSizeInBytes = taskSourcesMap.values().stream().mapToLong(serializedTaskSource -> serializedTaskSource.getBytes().length).sum();
log.info("Total serialized size of all task sources for fragment %s: %s", fragmentId, DataSize.succinctBytes(allTaskSourcesSerializedSizeInBytes));
List<List<SerializedPrestoSparkTaskSource>> taskSourcesByPartitionId = new ArrayList<>();
// If the fragment contains any shuffle inputs, this value will be present
if (numberOfShufflePartitions.isPresent()) {
// non bucketed tables match, an empty partition must be inserted if bucket is missing.
for (int partitionId = 0; partitionId < numberOfShufflePartitions.get(); partitionId++) {
// Eagerly remove task sources from the map to let GC reclaim the memory
// If task sources are missing for a partition the removeAll returns an empty list
taskSourcesByPartitionId.add(requireNonNull(taskSourcesMap.removeAll(partitionId), "taskSources is null"));
}
} else {
taskSourcesByPartitionId.addAll(Multimaps.asMap(taskSourcesMap).values());
}
return new PrestoSparkTaskSourceRdd(sparkContext.sc(), taskSourcesByPartitionId);
}
use of com.facebook.presto.split.SplitSource in project presto by prestodb.
the class SectionExecutionFactory method createStageScheduler.
private StageScheduler createStageScheduler(SplitSourceFactory splitSourceFactory, Session session, StreamingSubPlan plan, Function<PartitioningHandle, NodePartitionMap> partitioningCache, Optional<SqlStageExecution> parentStageExecution, StageId stageId, SqlStageExecution stageExecution, PartitioningHandle partitioningHandle, TableWriteInfo tableWriteInfo, Set<SqlStageExecution> childStageExecutions) {
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(plan.getFragment(), session, tableWriteInfo);
int maxTasksPerStage = getMaxTasksPerStage(session);
if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
// nodes are selected dynamically based on the constraints of the splits and the system load
Map.Entry<PlanNodeId, SplitSource> entry = getOnlyElement(splitSources.entrySet());
PlanNodeId planNodeId = entry.getKey();
SplitSource splitSource = entry.getValue();
ConnectorId connectorId = splitSource.getConnectorId();
if (isInternalSystemConnector(connectorId)) {
connectorId = null;
}
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, connectorId, maxTasksPerStage);
SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeSelector, stageExecution::getAllTasks);
checkArgument(!plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution());
return newSourcePartitionedSchedulerAsStageScheduler(stageExecution, planNodeId, splitSource, placementPolicy, splitBatchSize);
} else if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
Supplier<Collection<TaskStatus>> sourceTasksProvider = () -> childStageExecutions.stream().map(SqlStageExecution::getAllTasks).flatMap(Collection::stream).map(RemoteTask::getTaskStatus).collect(toList());
Supplier<Collection<TaskStatus>> writerTasksProvider = () -> stageExecution.getAllTasks().stream().map(RemoteTask::getTaskStatus).collect(toList());
ScaledWriterScheduler scheduler = new ScaledWriterScheduler(stageExecution, sourceTasksProvider, writerTasksProvider, nodeScheduler.createNodeSelector(session, null), scheduledExecutor, getWriterMinSize(session), isOptimizedScaleWriterProducerBuffer(session));
whenAllStages(childStageExecutions, StageExecutionState::isDone).addListener(scheduler::finish, directExecutor());
return scheduler;
} else {
if (!splitSources.isEmpty()) {
// contains local source
List<PlanNodeId> schedulingOrder = plan.getFragment().getTableScanSchedulingOrder();
ConnectorId connectorId = partitioningHandle.getConnectorId().orElseThrow(IllegalStateException::new);
List<ConnectorPartitionHandle> connectorPartitionHandles;
boolean groupedExecutionForStage = plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution();
if (groupedExecutionForStage) {
connectorPartitionHandles = nodePartitioningManager.listPartitionHandles(session, partitioningHandle);
checkState(!ImmutableList.of(NOT_PARTITIONED).equals(connectorPartitionHandles));
} else {
connectorPartitionHandles = ImmutableList.of(NOT_PARTITIONED);
}
BucketNodeMap bucketNodeMap;
List<InternalNode> stageNodeList;
if (plan.getFragment().getRemoteSourceNodes().stream().allMatch(node -> node.getExchangeType() == REPLICATE)) {
// no non-replicated remote source
boolean dynamicLifespanSchedule = plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule();
bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule);
// verify execution is consistent with planner's decision on dynamic lifespan schedule
verify(bucketNodeMap.isDynamic() == dynamicLifespanSchedule);
if (bucketNodeMap.hasInitialMap()) {
stageNodeList = bucketNodeMap.getBucketToNode().get().stream().distinct().collect(toImmutableList());
} else {
stageNodeList = new ArrayList<>(nodeScheduler.createNodeSelector(session, connectorId).selectRandomNodes(maxTasksPerStage));
}
} else {
// cannot use dynamic lifespan schedule
verify(!plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule());
// remote source requires nodePartitionMap
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
if (groupedExecutionForStage) {
checkState(connectorPartitionHandles.size() == nodePartitionMap.getBucketToPartition().length);
}
stageNodeList = nodePartitionMap.getPartitionToNode();
bucketNodeMap = nodePartitionMap.asBucketNodeMap();
}
FixedSourcePartitionedScheduler fixedSourcePartitionedScheduler = new FixedSourcePartitionedScheduler(stageExecution, splitSources, plan.getFragment().getStageExecutionDescriptor(), schedulingOrder, stageNodeList, bucketNodeMap, splitBatchSize, getConcurrentLifespansPerNode(session), nodeScheduler.createNodeSelector(session, connectorId), connectorPartitionHandles);
if (plan.getFragment().getStageExecutionDescriptor().isRecoverableGroupedExecution()) {
stageExecution.registerStageTaskRecoveryCallback(taskId -> {
checkArgument(taskId.getStageExecutionId().getStageId().equals(stageId), "The task did not execute this stage");
checkArgument(parentStageExecution.isPresent(), "Parent stage execution must exist");
checkArgument(parentStageExecution.get().getAllTasks().size() == 1, "Parent stage should only have one task for recoverable grouped execution");
parentStageExecution.get().removeRemoteSourceIfSingleTaskStage(taskId);
fixedSourcePartitionedScheduler.recover(taskId);
});
}
return fixedSourcePartitionedScheduler;
} else {
// all sources are remote
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
List<InternalNode> partitionToNode = nodePartitionMap.getPartitionToNode();
// todo this should asynchronously wait a standard timeout period before failing
checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
return new FixedCountScheduler(stageExecution, partitionToNode);
}
}
}
use of com.facebook.presto.split.SplitSource in project presto by prestodb.
the class LocalQueryRunner method createDrivers.
private List<Driver> createDrivers(Session session, Plan plan, OutputFactory outputFactory, TaskContext taskContext) {
if (printPlan) {
System.out.println(PlanPrinter.textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata.getFunctionAndTypeManager(), plan.getStatsAndCosts(), session, 0, false));
}
SubPlan subplan = createSubPlans(session, plan, true);
if (!subplan.getChildren().isEmpty()) {
throw new AssertionError("Expected subplan to have no children");
}
LocalExecutionPlanner executionPlanner = new LocalExecutionPlanner(metadata, Optional.empty(), pageSourceManager, indexManager, partitioningProviderManager, nodePartitioningManager, pageSinkManager, distributedMetadataManager, expressionCompiler, pageFunctionCompiler, joinFilterFunctionCompiler, new IndexJoinLookupStats(), new TaskManagerConfig().setTaskConcurrency(4), new MemoryManagerConfig(), spillerFactory, singleStreamSpillerFactory, partitioningSpillerFactory, blockEncodingManager, new PagesIndex.TestingFactory(false), joinCompiler, new LookupJoinOperators(), new OrderingCompiler(), jsonCodec(TableCommitContext.class), new RowExpressionDeterminismEvaluator(metadata), new NoOpFragmentResultCacheManager(), new ObjectMapper(), standaloneSpillerFactory);
// plan query
StageExecutionDescriptor stageExecutionDescriptor = subplan.getFragment().getStageExecutionDescriptor();
StreamingPlanSection streamingPlanSection = extractStreamingSections(subplan);
checkState(streamingPlanSection.getChildren().isEmpty(), "expected no materialized exchanges");
StreamingSubPlan streamingSubPlan = streamingPlanSection.getPlan();
LocalExecutionPlan localExecutionPlan = executionPlanner.plan(taskContext, stageExecutionDescriptor, subplan.getFragment().getRoot(), subplan.getFragment().getPartitioningScheme(), subplan.getFragment().getTableScanSchedulingOrder(), outputFactory, Optional.empty(), new UnsupportedRemoteSourceFactory(), createTableWriteInfo(streamingSubPlan, metadata, session), false);
// generate sources
List<TaskSource> sources = new ArrayList<>();
long sequenceId = 0;
for (TableScanNode tableScan : findTableScanNodes(subplan.getFragment().getRoot())) {
SplitSource splitSource = splitManager.getSplits(session, tableScan.getTable(), getSplitSchedulingStrategy(stageExecutionDescriptor, tableScan.getId()), WarningCollector.NOOP);
ImmutableSet.Builder<ScheduledSplit> scheduledSplits = ImmutableSet.builder();
while (!splitSource.isFinished()) {
for (Split split : getNextBatch(splitSource)) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScan.getId(), split));
}
}
sources.add(new TaskSource(tableScan.getId(), scheduledSplits.build(), true));
}
// create drivers
List<Driver> drivers = new ArrayList<>();
Map<PlanNodeId, DriverFactory> driverFactoriesBySource = new HashMap<>();
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
for (int i = 0; i < driverFactory.getDriverInstances().orElse(1); i++) {
if (driverFactory.getSourceId().isPresent()) {
checkState(driverFactoriesBySource.put(driverFactory.getSourceId().get(), driverFactory) == null);
} else {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), false).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
drivers.add(driver);
}
}
}
// add sources to the drivers
Set<PlanNodeId> tableScanPlanNodeIds = ImmutableSet.copyOf(subplan.getFragment().getTableScanSchedulingOrder());
for (TaskSource source : sources) {
DriverFactory driverFactory = driverFactoriesBySource.get(source.getPlanNodeId());
checkState(driverFactory != null);
boolean partitioned = tableScanPlanNodeIds.contains(driverFactory.getSourceId().get());
for (ScheduledSplit split : source.getSplits()) {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), partitioned).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
driver.updateSource(new TaskSource(split.getPlanNodeId(), ImmutableSet.of(split), true));
drivers.add(driver);
}
}
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
driverFactory.noMoreDrivers();
}
return ImmutableList.copyOf(drivers);
}
Aggregations