use of io.prestosql.operator.DriverFactory in project hetu-core by openlookeng.
the class TestSqlTaskExecution method testMarker.
@Test(dataProvider = "executionStrategies", timeOut = 20_000)
public void testMarker(PipelineExecutionStrategy executionStrategy) throws Exception {
// This test is a copy of testSimple(). Only difference is some Marker task sources are added to the input,
// and to verify if they can be converted to pages, and these pages can be consumed correctly.
ScheduledExecutorService taskNotificationExecutor = newScheduledThreadPool(10, threadsNamed("task-notification-%s"));
ScheduledExecutorService driverYieldExecutor = newScheduledThreadPool(2, threadsNamed("driver-yield-%s"));
TaskExecutor taskExecutor = new TaskExecutor(5, 10, 3, 4, Ticker.systemTicker());
taskExecutor.start();
try {
TaskStateMachine taskStateMachine = new TaskStateMachine(TaskId.valueOf("query.1.1"), taskNotificationExecutor);
PartitionedOutputBuffer outputBuffer = newTestingOutputBuffer(taskNotificationExecutor);
OutputBufferConsumer outputBufferConsumer = new OutputBufferConsumer(outputBuffer, OUTPUT_BUFFER_ID);
//
// test initialization: simple task with 1 pipeline
//
// pipeline 0 ... pipeline id
// partitioned ... partitioned/unpartitioned pipeline
// grouped ... execution strategy (in grouped test)
// ungrouped ... execution strategy (in ungrouped test)
//
// TaskOutput
// |
// Scan
//
// See #testComplex for all the bahaviors that are tested. Not all of them apply here.
TestingScanOperatorFactory testingScanOperatorFactory = new TestingScanOperatorFactory(0, TABLE_SCAN_NODE_ID, ImmutableList.of(VARCHAR));
TaskOutputOperatorFactory taskOutputOperatorFactory = new TaskOutputOperatorFactory(1, TABLE_SCAN_NODE_ID, outputBuffer, Function.identity());
LocalExecutionPlan localExecutionPlan = new LocalExecutionPlan(ImmutableList.of(new DriverFactory(0, true, true, ImmutableList.of(testingScanOperatorFactory, taskOutputOperatorFactory), OptionalInt.empty(), executionStrategy)), ImmutableList.of(TABLE_SCAN_NODE_ID), executionStrategy == GROUPED_EXECUTION ? StageExecutionDescriptor.fixedLifespanScheduleGroupedExecution(ImmutableList.of(TABLE_SCAN_NODE_ID)) : StageExecutionDescriptor.ungroupedExecution(), Optional.empty());
TaskContext taskContext = newTestingTaskContext(taskNotificationExecutor, driverYieldExecutor, taskStateMachine, true);
SqlTaskExecution sqlTaskExecution = SqlTaskExecution.createSqlTaskExecution(taskStateMachine, taskContext, outputBuffer, ImmutableList.of(), localExecutionPlan, taskExecutor, taskNotificationExecutor, createTestSplitMonitor());
//
// test body
assertEquals(taskStateMachine.getState(), TaskState.RUNNING);
switch(executionStrategy) {
case UNGROUPED_EXECUTION:
// add source for pipeline
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(0, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 100000, 123)), false)));
// assert that partial task result is produced
outputBufferConsumer.consume(123, ASSERT_WAIT_TIMEOUT);
// add marker source
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newMarkerSplit(1, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 1)), false)));
// assert that partial task result is produced
outputBufferConsumer.consume(1, ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
// * operatorFactory will be closed even though operator can't execute
// * completedDriverGroups will NOT include the newly scheduled driver group while pause is in place
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline, mark as no more splits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(2, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 200000, 300), newScheduledSplit(3, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 300000, 200)), false)));
// add marker source
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newMarkerSplit(4, TABLE_SCAN_NODE_ID, Lifespan.taskWide(), 2)), true)));
// resume operator execution: needed before "isOverallNoMoreOperators" can become true
testingScanOperatorFactory.getPauser().resume();
// assert that pipeline will have no more drivers
waitUntilEquals(testingScanOperatorFactory::isOverallNoMoreOperators, true, ASSERT_WAIT_TIMEOUT);
// assert that no DriverGroup is fully completed
assertEquals(taskContext.getCompletedDriverGroups(), ImmutableSet.of());
// assert that task result is produced
outputBufferConsumer.consume(300 + 200 + 1, ASSERT_WAIT_TIMEOUT);
outputBufferConsumer.assertBufferComplete(ASSERT_WAIT_TIMEOUT);
break;
case GROUPED_EXECUTION:
// add source for pipeline (driver group [1, 5]), mark driver group [1] as noMoreSplits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(0, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(1), 0, 1), newScheduledSplit(1, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(5), 100000, 10)), false)));
// add marker source
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newMarkerSplit(2, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(1), 1)), ImmutableSet.of(Lifespan.driverGroup(1)), false)));
// assert that pipeline will have no more drivers for driver group [1]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1)), ASSERT_WAIT_TIMEOUT);
// assert that partial result is produced for both driver groups
outputBufferConsumer.consume(1 + 10 + 1, ASSERT_WAIT_TIMEOUT);
// assert that driver group [1] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1)), ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
// * operatorFactory will be closed even though operator can't execute
// * completedDriverGroups will NOT include the newly scheduled driver group while pause is in place
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline (driver group [5]), mark driver group [5] as noMoreSplits
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(3, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(5), 200000, 300)), false)));
// add marker source
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newMarkerSplit(4, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(5), 2)), ImmutableSet.of(Lifespan.driverGroup(5)), false)));
// resume operator execution
testingScanOperatorFactory.getPauser().resume();
// assert that pipeline will have no more drivers for driver group [1, 5]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)), ASSERT_WAIT_TIMEOUT);
// assert that partial result is produced
outputBufferConsumer.consume(300 + 1, ASSERT_WAIT_TIMEOUT);
// assert that driver group [1, 5] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5)), ASSERT_WAIT_TIMEOUT);
// pause operator execution to make sure that
testingScanOperatorFactory.getPauser().pause();
// add source for pipeline (driver group [7]), mark pipeline as noMoreSplits without explicitly marking driver group 7
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newScheduledSplit(5, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(7), 300000, 45), newScheduledSplit(6, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(7), 400000, 54)), false)));
// add marker source
sqlTaskExecution.addSources(ImmutableList.of(new TaskSource(TABLE_SCAN_NODE_ID, ImmutableSet.of(newMarkerSplit(7, TABLE_SCAN_NODE_ID, Lifespan.driverGroup(7), 3)), true)));
// resume operator execution
testingScanOperatorFactory.getPauser().resume();
// assert that pipeline will have no more drivers for driver group [1, 5, 7]
waitUntilEquals(testingScanOperatorFactory::getDriverGroupsWithNoMoreOperators, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5), Lifespan.driverGroup(7)), ASSERT_WAIT_TIMEOUT);
// assert that pipeline will have no more drivers
waitUntilEquals(testingScanOperatorFactory::isOverallNoMoreOperators, true, ASSERT_WAIT_TIMEOUT);
// assert that result is produced
outputBufferConsumer.consume(45 + 54 + 1, ASSERT_WAIT_TIMEOUT);
outputBufferConsumer.assertBufferComplete(ASSERT_WAIT_TIMEOUT);
// assert that driver group [1, 5, 7] is fully completed
waitUntilEquals(taskContext::getCompletedDriverGroups, ImmutableSet.of(Lifespan.driverGroup(1), Lifespan.driverGroup(5), Lifespan.driverGroup(7)), ASSERT_WAIT_TIMEOUT);
break;
default:
throw new UnsupportedOperationException();
}
// complete the task by calling abort on it
outputBufferConsumer.abort();
TaskState taskState = taskStateMachine.getStateChange(TaskState.RUNNING).get(10, SECONDS);
assertEquals(taskState, TaskState.FINISHED);
} finally {
taskExecutor.stop();
taskNotificationExecutor.shutdownNow();
driverYieldExecutor.shutdown();
}
}
use of io.prestosql.operator.DriverFactory in project boostkit-bigdata by kunpengcompute.
the class OmniLocalQueryRunner method createDrivers.
private List<Driver> createDrivers(Session session, Plan plan, OutputFactory outputFactory, TaskContext taskContext) {
if (printPlan) {
System.out.println(PlanPrinter.textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata, plan.getStatsAndCosts(), session, 0, false));
}
SubPlan subplan = planFragmenter.createSubPlans(session, plan, true, WarningCollector.NOOP);
if (!subplan.getChildren().isEmpty()) {
throw new AssertionError("Expected subplan to have no children");
}
NodeInfo nodeInfo = new NodeInfo("test");
FileSystemClientManager fileSystemClientManager = new FileSystemClientManager();
SeedStoreManager seedStoreManager = new SeedStoreManager(fileSystemClientManager);
StateStoreProvider stateStoreProvider = new LocalStateStoreProvider(seedStoreManager);
LocalExecutionPlanner executionPlanner = new LocalExecutionPlanner(metadata, new TypeAnalyzer(sqlParser, metadata), Optional.empty(), pageSourceManager, indexManager, nodePartitioningManager, pageSinkManager, null, expressionCompiler, pageFunctionCompiler, joinFilterFunctionCompiler, new IndexJoinLookupStats(), this.taskManagerConfig, spillerFactory, singleStreamSpillerFactory, partitioningSpillerFactory, new PagesIndex.TestingFactory(false), joinCompiler, new LookupJoinOperators(), new OrderingCompiler(), nodeInfo, stateStoreProvider, new StateStoreListenerManager(stateStoreProvider), new DynamicFilterCacheManager(), heuristicIndexerManager, cubeManager);
// plan query
StageExecutionDescriptor stageExecutionDescriptor = subplan.getFragment().getStageExecutionDescriptor();
LocalExecutionPlan localExecutionPlan = executionPlanner.plan(taskContext, stageExecutionDescriptor, subplan.getFragment().getRoot(), subplan.getFragment().getPartitioningScheme().getOutputLayout(), plan.getTypes(), subplan.getFragment().getPartitionedSources(), null, outputFactory, Optional.empty(), Optional.empty(), null);
// generate sources
List<TaskSource> sources = new ArrayList<>();
long sequenceId = 0;
for (TableScanNode tableScan : findTableScanNodes(subplan.getFragment().getRoot())) {
TableHandle table = tableScan.getTable();
SplitSource splitSource = splitManager.getSplits(session, table, stageExecutionDescriptor.isScanGroupedExecution(tableScan.getId()) ? GROUPED_SCHEDULING : UNGROUPED_SCHEDULING, null, Optional.empty(), Collections.emptyMap(), ImmutableSet.of(), tableScan.getStrategy() != ReuseExchangeOperator.STRATEGY.REUSE_STRATEGY_DEFAULT, tableScan.getId());
ImmutableSet.Builder<ScheduledSplit> scheduledSplits = ImmutableSet.builder();
while (!splitSource.isFinished()) {
for (Split split : getNextBatch(splitSource)) {
scheduledSplits.add(new ScheduledSplit(sequenceId++, tableScan.getId(), split));
}
}
sources.add(new TaskSource(tableScan.getId(), scheduledSplits.build(), true));
}
// create drivers
List<Driver> drivers = new ArrayList<>();
Map<PlanNodeId, DriverFactory> driverFactoriesBySource = new HashMap<>();
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
for (int i = 0; i < driverFactory.getDriverInstances().orElse(1); i++) {
if (driverFactory.getSourceId().isPresent()) {
checkState(driverFactoriesBySource.put(driverFactory.getSourceId().get(), driverFactory) == null);
} else {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), false).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
drivers.add(driver);
}
}
}
// add sources to the drivers
ImmutableSet<PlanNodeId> partitionedSources = ImmutableSet.copyOf(subplan.getFragment().getPartitionedSources());
for (TaskSource source : sources) {
DriverFactory driverFactory = driverFactoriesBySource.get(source.getPlanNodeId());
checkState(driverFactory != null);
boolean partitioned = partitionedSources.contains(driverFactory.getSourceId().get());
for (ScheduledSplit split : source.getSplits()) {
DriverContext driverContext = taskContext.addPipelineContext(driverFactory.getPipelineId(), driverFactory.isInputDriver(), driverFactory.isOutputDriver(), partitioned).addDriverContext();
Driver driver = driverFactory.createDriver(driverContext);
driver.updateSource(new TaskSource(split.getPlanNodeId(), ImmutableSet.of(split), true));
drivers.add(driver);
}
}
for (DriverFactory driverFactory : localExecutionPlan.getDriverFactories()) {
driverFactory.noMoreDrivers();
}
return ImmutableList.copyOf(drivers);
}
use of io.prestosql.operator.DriverFactory in project hetu-core by openlookeng.
the class SqlTaskExecution method createTaskHandle.
// this is a separate method to ensure that the `this` reference is not leaked during construction
private static TaskHandle createTaskHandle(TaskStateMachine taskStateMachine, TaskContext taskContext, OutputBuffer outputBuffer, LocalExecutionPlan localExecutionPlan, TaskExecutor taskExecutor) {
TaskHandle localTaskHandle = taskExecutor.addTask(taskStateMachine.getTaskId(), outputBuffer::getUtilization, getInitialSplitsPerNode(taskContext.getSession()), getSplitConcurrencyAdjustmentInterval(taskContext.getSession()), getMaxDriversPerTask(taskContext.getSession()));
taskStateMachine.addStateChangeListener(state -> {
if (state.isDone()) {
taskExecutor.removeTask(localTaskHandle);
for (DriverFactory factory : localExecutionPlan.getDriverFactories()) {
factory.noMoreDrivers();
}
}
});
return localTaskHandle;
}
use of io.prestosql.operator.DriverFactory in project hetu-core by openlookeng.
the class LocalExecutionPlanner method plan.
public LocalExecutionPlan plan(TaskContext taskContext, StageExecutionDescriptor stageExecutionDescriptor, PlanNode plan, List<Symbol> outputLayout, TypeProvider types, List<PlanNodeId> partitionedSourceOrder, OutputBuffer outputBuffer, OutputFactory outputOperatorFactory, Optional<PlanFragmentId> feederCTEId, Optional<PlanNodeId> feederCTEParentId, Map<String, CommonTableExecutionContext> cteCtx) {
Session session = taskContext.getSession();
LocalExecutionPlanContext context = new LocalExecutionPlanContext(taskContext, types, metadata, dynamicFilterCacheManager, feederCTEId, feederCTEParentId, cteCtx);
PhysicalOperation physicalOperation = plan.accept(new Visitor(session, stageExecutionDescriptor), context);
Function<Page, Page> pagePreprocessor = enforceLayoutProcessor(outputLayout, physicalOperation.getLayout());
List<Type> outputTypes = outputLayout.stream().map(types::get).collect(toImmutableList());
context.addDriverFactory(context.isInputDriver(), true, ImmutableList.<OperatorFactory>builder().addAll(physicalOperation.getOperatorFactories()).add(outputOperatorFactory.createOutputOperator(context.getNextOperatorId(), plan.getId(), outputTypes, pagePreprocessor, taskContext)).build(), context.getDriverInstanceCount(), physicalOperation.getPipelineExecutionStrategy());
addLookupOuterDrivers(context);
// notify operator factories that planning has completed
context.getDriverFactories().stream().map(DriverFactory::getOperatorFactories).flatMap(List::stream).filter(LocalPlannerAware.class::isInstance).map(LocalPlannerAware.class::cast).forEach(LocalPlannerAware::localPlannerComplete);
// calculate total number of components to be captured and add to snapshotManager
if (SystemSessionProperties.isSnapshotEnabled(session)) {
taskContext.getSnapshotManager().setTotalComponents(calculateTotalCountOfTaskComponentToBeCaptured(taskContext, context, outputBuffer));
}
return new LocalExecutionPlan(context.getDriverFactories(), partitionedSourceOrder, stageExecutionDescriptor, feederCTEId);
}
use of io.prestosql.operator.DriverFactory in project hetu-core by openlookeng.
the class LocalExecutionPlanner method addLookupOuterDrivers.
private static void addLookupOuterDrivers(LocalExecutionPlanContext context) {
// driver to output the unused rows in the lookup source
for (DriverFactory factory : context.getDriverFactories()) {
List<OperatorFactory> operatorFactories = factory.getOperatorFactories();
for (int i = 0; i < operatorFactories.size(); i++) {
OperatorFactory operatorFactory = operatorFactories.get(i);
if (!(operatorFactory instanceof JoinOperatorFactory)) {
continue;
}
JoinOperatorFactory lookupJoin = (JoinOperatorFactory) operatorFactory;
Optional<OuterOperatorFactoryResult> outerOperatorFactoryResult = lookupJoin.createOuterOperatorFactory();
if (outerOperatorFactoryResult.isPresent()) {
// Add a new driver to output the unmatched rows in an outer join.
// We duplicate all of the factories above the JoinOperator (the ones reading from the joins),
// and replace the JoinOperator with the OuterOperator (the one that produces unmatched rows).
ImmutableList.Builder<OperatorFactory> newOperators = ImmutableList.builder();
newOperators.add(outerOperatorFactoryResult.get().getOuterOperatorFactory());
operatorFactories.subList(i + 1, operatorFactories.size()).stream().map(OperatorFactory::duplicate).forEach(newOperators::add);
DriverFactory outerFactory = context.addDriverFactory(false, factory.isOutputDriver(), newOperators.build(), OptionalInt.of(1), outerOperatorFactoryResult.get().getBuildExecutionStrategy());
context.outerToJoinMap.put(outerFactory, factory);
}
}
}
}
Aggregations