use of com.facebook.presto.operator.DriverFactory in project presto by prestodb.
the class LocalQueryRunner method createDrivers.
public List<Driver> createDrivers(Session session, @Language("SQL") String sql, OutputFactory outputFactory, TaskContext taskContext) {
Plan plan = createPlan(session, sql);
if (printPlan) {
System.out.println(PlanPrinter.textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata, session));
}
SubPlan subplan = PlanFragmenter.createSubPlans(session, metadata, plan);
if (!subplan.getChildren().isEmpty()) {
throw new AssertionError("Expected subplan to have no children");
}
LocalExecutionPlanner executionPlanner = new LocalExecutionPlanner(metadata, sqlParser, Optional.empty(), pageSourceManager, indexManager, nodePartitioningManager, pageSinkManager, null, expressionCompiler, joinFilterFunctionCompiler, new IndexJoinLookupStats(), // make sure tests fail if compiler breaks
new CompilerConfig().setInterpreterEnabled(false), new TaskManagerConfig().setTaskConcurrency(4), spillerFactory, blockEncodingSerde, new PagesIndex.TestingFactory(), new JoinCompiler(), new LookupJoinOperators(new JoinProbeCompiler()));
// plan query
LocalExecutionPlan localExecutionPlan = executionPlanner.plan(session, subplan.getFragment().getRoot(), subplan.getFragment().getPartitioningScheme().getOutputLayout(), plan.getTypes(), outputFactory);
// generate sources
List<TaskSource> sources = new ArrayList<>();
long sequenceId = 0;
for (TableScanNode tableScan : findTableScanNodes(subplan.getFragment().getRoot())) {
TableLayoutHandle layout = tableScan.getLayout().get();
SplitSource splitSource = splitManager.getSplits(session, layout);
ImmutableSet.Builder<ScheduledSplit> scheduledSplits = ImmutableSet.builder();
while (!splitSource.isFinished()) {
for (Split split : getFutureValue(splitSource.getNextBatch(1000))) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScan.getId(), split));
}
}
sources.add(new TaskSource(tableScan.getId(), scheduledSplits.build(), true));
}
// create drivers
List<Driver> drivers = new ArrayList<>();
Map<PlanNodeId, DriverFactory> driverFactoriesBySource = new HashMap<>();
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
for (int i = 0; i < driverFactory.getDriverInstances().orElse(1); i++) {
if (driverFactory.getSourceId().isPresent()) {
checkState(driverFactoriesBySource.put(driverFactory.getSourceId().get(), driverFactory) == null);
} else {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver()).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
drivers.add(driver);
}
}
}
// add sources to the drivers
for (TaskSource source : sources) {
DriverFactory driverFactory = driverFactoriesBySource.get(source.getPlanNodeId());
checkState(driverFactory != null);
for (ScheduledSplit split : source.getSplits()) {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver()).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
driver.updateSource(new TaskSource(split.getPlanNodeId(), ImmutableSet.of(split), true));
drivers.add(driver);
}
}
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
driverFactory.close();
}
return ImmutableList.copyOf(drivers);
}
use of com.facebook.presto.operator.DriverFactory in project presto by prestodb.
the class LocalExecutionPlanner method addLookupOuterDrivers.
private static void addLookupOuterDrivers(LocalExecutionPlanContext context) {
// driver to output the unused rows in the lookup source
for (DriverFactory factory : context.getDriverFactories()) {
List<OperatorFactory> operatorFactories = factory.getOperatorFactories();
for (int i = 0; i < operatorFactories.size(); i++) {
OperatorFactory operatorFactory = operatorFactories.get(i);
if (!(operatorFactory instanceof JoinOperatorFactory)) {
continue;
}
JoinOperatorFactory lookupJoin = (JoinOperatorFactory) operatorFactory;
Optional<OuterOperatorFactoryResult> outerOperatorFactoryResult = lookupJoin.createOuterOperatorFactory();
if (outerOperatorFactoryResult.isPresent()) {
// Add a new driver to output the unmatched rows in an outer join.
// We duplicate all of the factories above the JoinOperator (the ones reading from the joins),
// and replace the JoinOperator with the OuterOperator (the one that produces unmatched rows).
ImmutableList.Builder<OperatorFactory> newOperators = ImmutableList.builder();
newOperators.add(outerOperatorFactoryResult.get().getOuterOperatorFactory());
operatorFactories.subList(i + 1, operatorFactories.size()).stream().map(OperatorFactory::duplicate).forEach(newOperators::add);
context.addDriverFactory(false, factory.isOutputDriver(), newOperators.build(), OptionalInt.of(1), outerOperatorFactoryResult.get().getBuildExecutionStrategy(), Optional.empty());
}
}
}
}
use of com.facebook.presto.operator.DriverFactory in project presto by prestodb.
the class TestSqlTaskExecution method testSimple.
@Test(dataProvider = "executionStrategies", timeOut = 20_000)
public void testSimple(PipelineExecutionStrategy executionStrategy) throws Exception {
ScheduledExecutorService taskNotificationExecutor = newScheduledThreadPool(10, threadsNamed("task-notification-%s"));
ScheduledExecutorService driverYieldExecutor = newScheduledThreadPool(2, threadsNamed("driver-yield-%s"));
TaskExecutor taskExecutor = new TaskExecutor(5, 10, 3, 4, TASK_FAIR, Ticker.systemTicker());
taskExecutor.start();
try {
TaskStateMachine taskStateMachine = new TaskStateMachine(TASK_ID, taskNotificationExecutor);
PartitionedOutputBuffer outputBuffer = newTestingOutputBuffer(taskNotificationExecutor);
OutputBufferConsumer outputBufferConsumer = new OutputBufferConsumer(outputBuffer, OUTPUT_BUFFER_ID);
//
// test initialization: simple task with 1 pipeline
//
// pipeline 0 ... pipeline id
// partitioned ... partitioned/unpartitioned pipeline
// grouped ... execution strategy (in grouped test)
// ungrouped ... execution strategy (in ungrouped test)
//
// TaskOutput
// |
// Scan
//
// See #testComplex for all the bahaviors that are tested. Not all of them apply here.
TestingScanOperatorFactory testingScanOperatorFactory = new TestingScanOperatorFactory(0, TABLE_SCAN_NODE_ID, ImmutableList.of(VARCHAR));
TaskOutputOperatorFactory taskOutputOperatorFactory = new TaskOutputOperatorFactory(1, TABLE_SCAN_NODE_ID, outputBuffer, Function.identity(), new PagesSerdeFactory(new BlockEncodingManager(), false));
LocalExecutionPlan localExecutionPlan = new LocalExecutionPlan(ImmutableList.of(new DriverFactory(0, true, true, ImmutableList.of(testingScanOperatorFactory, taskOutputOperatorFactory), OptionalInt.empty(), executionStrategy, Optional.empty())), ImmutableList.of(TABLE_SCAN_NODE_ID), executionStrategy == GROUPED_EXECUTION ? StageExecutionDescriptor.fixedLifespanScheduleGroupedExecution(ImmutableList.of(TABLE_SCAN_NODE_ID), 8) : StageExecutionDescriptor.ungroupedExecution());
TaskContext taskContext = newTestingTaskContext(taskNotificationExecutor, driverYieldExecutor, taskStateMachine);
SqlTaskExecution sqlTaskExecution = SqlTaskExecution.createSqlTaskExecution(taskStateMachine, taskContext, outputBuffer, ImmutableList.of(), localExecutionPlan, taskExecutor, taskNotificationExecutor, createTestSplitMonitor());
//
// test body
assertEquals(taskStateMachine.getState(), TaskState.RUNNING);
switch(executionStrategy) {
case UNGROUPED_EXECUTION:
// add source for pipeline
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(0, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 100000, 123)), false)));
// assert that partial task result is produced
outputBufferConsumer.consume(123, ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
// * operatorFactory will be closed even though operator can't execute
// * completedDriverGroups will NOT include the newly scheduled driver group while pause is in place
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline, mark as no more splits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(1, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 200000, 300), newScheduledSplit(2, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 300000, 200)), true)));
// assert that pipeline will have no more drivers
waitUntilEquals(testingScanOperatorFactory::isOverallNoMoreOperators, true, ASSERT_WAIT_TIMEOUT);
// assert that no DriverGroup is fully completed
assertEquals(taskContext.getCompletedDriverGroups(), ImmutableSet.of());
// resume operator execution
testingScanOperatorFactory.getPauser().resume();
// assert that task result is produced
outputBufferConsumer.consume(300 + 200, ASSERT_WAIT_TIMEOUT);
outputBufferConsumer.assertBufferComplete(ASSERT_WAIT_TIMEOUT);
break;
case GROUPED_EXECUTION:
// add source for pipeline (driver group [1, 5]), mark driver group [1] as noMoreSplits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(0, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(1), 0, 1), newScheduledSplit(1, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(5), 100000, 10)), ImmutableSet.of(Lifespan.driverGroup(1)), false)));
// assert that pipeline will have no more drivers for driver group [1]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1)), ASSERT_WAIT_TIMEOUT);
// assert that partial result is produced for both driver groups
outputBufferConsumer.consume(1 + 10, ASSERT_WAIT_TIMEOUT);
// assert that driver group [1] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1)), ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
// * operatorFactory will be closed even though operator can't execute
// * completedDriverGroups will NOT include the newly scheduled driver group while pause is in place
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline (driver group [5]), mark driver group [5] as noMoreSplits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(2, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(5), 200000, 300)), ImmutableSet.of(Lifespan.driverGroup(5)), false)));
// assert that pipeline will have no more drivers for driver group [1, 5]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)), ASSERT_WAIT_TIMEOUT);
// assert that driver group [5] is NOT YET fully completed
assertEquals(taskContext.getCompletedDriverGroups(), ImmutableSet.of(Lifespan.driverGroup(1)));
// resume operator execution
testingScanOperatorFactory.getPauser().resume();
// assert that partial result is produced
outputBufferConsumer.consume(300, ASSERT_WAIT_TIMEOUT);
// assert that driver group [1, 5] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)), ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
// * operatorFactory will be closed even though operator can't execute
// * completedDriverGroups will NOT include the newly scheduled driver group while pause is in place
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline (driver group [7]), mark pipeline as noMoreSplits without explicitly marking driver group 7
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(3, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(7), 300000, 45), newScheduledSplit(4, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(7), 400000, 54)), ImmutableSet.of(), true)));
// assert that pipeline will have no more drivers for driver group [1, 5, 7]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5), Lifespan.driverGroup(7)), ASSERT_WAIT_TIMEOUT);
// assert that pipeline will have no more drivers
waitUntilEquals(testingScanOperatorFactory::isOverallNoMoreOperators, true, ASSERT_WAIT_TIMEOUT);
// assert that driver group [1, 5] is fully completed
assertEquals(taskContext.getCompletedDriverGroups(), ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)));
// resume operator execution
testingScanOperatorFactory.getPauser().resume();
// assert driver group [7] is not completed before output buffer is consumed
MILLISECONDS.sleep(1000);
assertEquals(taskContext.getCompletedDriverGroups(), ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)));
// assert that result is produced
outputBufferConsumer.consume(45 + 54, ASSERT_WAIT_TIMEOUT);
outputBufferConsumer.assertBufferComplete(ASSERT_WAIT_TIMEOUT);
// assert that driver group [1, 5, 7] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5), Lifespan.driverGroup(7)), ASSERT_WAIT_TIMEOUT);
break;
default:
throw new UnsupportedOperationException();
}
// complete the task by calling abort on it
outputBufferConsumer.abort();
TaskState taskState = taskStateMachine.getStateChange(TaskState.RUNNING).get(10, SECONDS);
assertEquals(taskState, TaskState.FINISHED);
} finally {
taskExecutor.stop();
taskNotificationExecutor.shutdownNow();
driverYieldExecutor.shutdown();
}
}
use of com.facebook.presto.operator.DriverFactory in project presto by prestodb.
the class LocalQueryRunner method createDrivers.
private List<Driver> createDrivers(Session session, Plan plan, OutputFactory outputFactory, TaskContext taskContext) {
if (printPlan) {
System.out.println(PlanPrinter.textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata.getFunctionAndTypeManager(), plan.getStatsAndCosts(), session, 0, false));
}
SubPlan subplan = createSubPlans(session, plan, true);
if (!subplan.getChildren().isEmpty()) {
throw new AssertionError("Expected subplan to have no children");
}
LocalExecutionPlanner executionPlanner = new LocalExecutionPlanner(metadata, Optional.empty(), pageSourceManager, indexManager, partitioningProviderManager, nodePartitioningManager, pageSinkManager, distributedMetadataManager, expressionCompiler, pageFunctionCompiler, joinFilterFunctionCompiler, new IndexJoinLookupStats(), new TaskManagerConfig().setTaskConcurrency(4), new MemoryManagerConfig(), spillerFactory, singleStreamSpillerFactory, partitioningSpillerFactory, blockEncodingManager, new PagesIndex.TestingFactory(false), joinCompiler, new LookupJoinOperators(), new OrderingCompiler(), jsonCodec(TableCommitContext.class), new RowExpressionDeterminismEvaluator(metadata), new NoOpFragmentResultCacheManager(), new ObjectMapper(), standaloneSpillerFactory);
// plan query
StageExecutionDescriptor stageExecutionDescriptor = subplan.getFragment().getStageExecutionDescriptor();
StreamingPlanSection streamingPlanSection = extractStreamingSections(subplan);
checkState(streamingPlanSection.getChildren().isEmpty(), "expected no materialized exchanges");
StreamingSubPlan streamingSubPlan = streamingPlanSection.getPlan();
LocalExecutionPlan localExecutionPlan = executionPlanner.plan(taskContext, stageExecutionDescriptor, subplan.getFragment().getRoot(), subplan.getFragment().getPartitioningScheme(), subplan.getFragment().getTableScanSchedulingOrder(), outputFactory, Optional.empty(), new UnsupportedRemoteSourceFactory(), createTableWriteInfo(streamingSubPlan, metadata, session), false);
// generate sources
List<TaskSource> sources = new ArrayList<>();
long sequenceId = 0;
for (TableScanNode tableScan : findTableScanNodes(subplan.getFragment().getRoot())) {
SplitSource splitSource = splitManager.getSplits(session, tableScan.getTable(), getSplitSchedulingStrategy(stageExecutionDescriptor, tableScan.getId()), WarningCollector.NOOP);
ImmutableSet.Builder<ScheduledSplit> scheduledSplits = ImmutableSet.builder();
while (!splitSource.isFinished()) {
for (Split split : getNextBatch(splitSource)) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScan.getId(), split));
}
}
sources.add(new TaskSource(tableScan.getId(), scheduledSplits.build(), true));
}
// create drivers
List<Driver> drivers = new ArrayList<>();
Map<PlanNodeId, DriverFactory> driverFactoriesBySource = new HashMap<>();
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
for (int i = 0; i < driverFactory.getDriverInstances().orElse(1); i++) {
if (driverFactory.getSourceId().isPresent()) {
checkState(driverFactoriesBySource.put(driverFactory.getSourceId().get(), driverFactory) == null);
} else {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), false).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
drivers.add(driver);
}
}
}
// add sources to the drivers
Set<PlanNodeId> tableScanPlanNodeIds = ImmutableSet.copyOf(subplan.getFragment().getTableScanSchedulingOrder());
for (TaskSource source : sources) {
DriverFactory driverFactory = driverFactoriesBySource.get(source.getPlanNodeId());
checkState(driverFactory != null);
boolean partitioned = tableScanPlanNodeIds.contains(driverFactory.getSourceId().get());
for (ScheduledSplit split : source.getSplits()) {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), partitioned).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
driver.updateSource(new TaskSource(split.getPlanNodeId(), ImmutableSet.of(split), true));
drivers.add(driver);
}
}
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
driverFactory.noMoreDrivers();
}
return ImmutableList.copyOf(drivers);
}
use of com.facebook.presto.operator.DriverFactory in project presto by prestodb.
the class LocalExecutionPlanner method plan.
public LocalExecutionPlan plan(Session session, PlanNode plan, List<Symbol> outputLayout, Map<Symbol, Type> types, OutputFactory outputOperatorFactory) {
LocalExecutionPlanContext context = new LocalExecutionPlanContext(session, types);
PhysicalOperation physicalOperation = plan.accept(new Visitor(session), context);
Function<Page, Page> pagePreprocessor = enforceLayoutProcessor(outputLayout, physicalOperation.getLayout());
List<Type> outputTypes = outputLayout.stream().map(types::get).collect(toImmutableList());
context.addDriverFactory(context.isInputDriver(), true, ImmutableList.<OperatorFactory>builder().addAll(physicalOperation.getOperatorFactories()).add(outputOperatorFactory.createOutputOperator(context.getNextOperatorId(), plan.getId(), outputTypes, pagePreprocessor, new PagesSerdeFactory(blockEncodingSerde, isExchangeCompressionEnabled(session)))).build(), context.getDriverInstanceCount());
addLookupOuterDrivers(context);
// notify operator factories that planning has completed
context.getDriverFactories().stream().map(DriverFactory::getOperatorFactories).flatMap(List::stream).filter(LocalPlannerAware.class::isInstance).map(LocalPlannerAware.class::cast).forEach(LocalPlannerAware::localPlannerComplete);
return new LocalExecutionPlan(context.getDriverFactories());
}
Aggregations