use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestSpatialJoinOperator method testYield.
@Test
public void testYield() {
// create a filter function that yields for every probe match
// verify we will yield #match times totally
TaskContext taskContext = createTaskContext();
DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext();
// force a yield for every match
AtomicInteger filterFunctionCalls = new AtomicInteger();
InternalJoinFilterFunction filterFunction = new TestInternalJoinFilterFunction(((leftPosition, leftPage, rightPosition, rightPage) -> {
filterFunctionCalls.incrementAndGet();
driverContext.getYieldSignal().forceYieldForTesting();
return true;
}));
RowPagesBuilder buildPages = rowPagesBuilder(ImmutableList.of(GEOMETRY, VARCHAR)).row(POLYGON_A, "A").pageBreak().row(POLYGON_B, "B");
PagesSpatialIndexFactory pagesSpatialIndexFactory = buildIndex(driverContext, (build, probe, r) -> build.contains(probe), Optional.empty(), Optional.of(filterFunction), buildPages);
// 10 points in polygon A (x0...x9)
// 10 points in polygons A and B (y0...y9)
// 10 points in polygon B (z0...z9)
// 40 total matches
RowPagesBuilder probePages = rowPagesBuilder(ImmutableList.of(GEOMETRY, VARCHAR));
for (int i = 0; i < 10; i++) {
probePages.row(stPoint(1 + 0.1 * i, 1 + 0.1 * i), "x" + i);
}
for (int i = 0; i < 10; i++) {
probePages.row(stPoint(4.5 + 0.01 * i, 4.5 + 0.01 * i), "y" + i);
}
for (int i = 0; i < 10; i++) {
probePages.row(stPoint(6 + 0.1 * i, 6 + 0.1 * i), "z" + i);
}
List<Page> probeInput = probePages.build();
OperatorFactory joinOperatorFactory = new SpatialJoinOperatorFactory(2, new PlanNodeId("test"), INNER, probePages.getTypes(), Ints.asList(1), 0, Optional.empty(), pagesSpatialIndexFactory);
Operator operator = joinOperatorFactory.createOperator(driverContext);
assertTrue(operator.needsInput());
operator.addInput(probeInput.get(0));
operator.finish();
// we will yield 40 times due to filterFunction
for (int i = 0; i < 40; i++) {
driverContext.getYieldSignal().setWithDelay(5 * SECONDS.toNanos(1), driverContext.getYieldExecutor());
assertNull(operator.getOutput());
assertEquals(filterFunctionCalls.get(), i + 1, "Expected join to stop processing (yield) after calling filter function once");
driverContext.getYieldSignal().reset();
}
// delayed yield is not going to prevent operator from producing a page now (yield won't be forced because filter function won't be called anymore)
driverContext.getYieldSignal().setWithDelay(5 * SECONDS.toNanos(1), driverContext.getYieldExecutor());
Page output = operator.getOutput();
assertNotNull(output);
// make sure we have 40 matches
assertEquals(output.getPositionCount(), 40);
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestSpatialJoinOperator method testDistanceQuery.
@Test
public void testDistanceQuery() {
TaskContext taskContext = createTaskContext();
DriverContext driverContext = taskContext.addPipelineContext(0, true, true, false).addDriverContext();
RowPagesBuilder buildPages = rowPagesBuilder(ImmutableList.of(GEOMETRY, VARCHAR, DOUBLE)).row(stPoint(0, 0), "0_0", 1.5).row(null, "null", 1.5).row(stPoint(1, 0), "1_0", 1.5).pageBreak().row(stPoint(3, 0), "3_0", 1.5).pageBreak().row(stPoint(10, 0), "10_0", 1.5);
PagesSpatialIndexFactory pagesSpatialIndexFactory = buildIndex(driverContext, (build, probe, r) -> build.distance(probe) <= r.getAsDouble(), Optional.of(2), Optional.empty(), buildPages);
RowPagesBuilder probePages = rowPagesBuilder(ImmutableList.of(GEOMETRY, VARCHAR)).row(stPoint(0, 1), "0_1").row(null, "null").row(stPoint(1, 1), "1_1").pageBreak().row(stPoint(3, 1), "3_1").pageBreak().row(stPoint(10, 1), "10_1");
OperatorFactory joinOperatorFactory = new SpatialJoinOperatorFactory(2, new PlanNodeId("test"), INNER, probePages.getTypes(), Ints.asList(1), 0, Optional.empty(), pagesSpatialIndexFactory);
// Make sure that spatial index reference counting works with duplicate factories
joinOperatorFactory.duplicate().noMoreOperators();
MaterializedResult expected = resultBuilder(taskContext.getSession(), ImmutableList.of(VARCHAR, VARCHAR)).row("0_1", "0_0").row("0_1", "1_0").row("1_1", "0_0").row("1_1", "1_0").row("3_1", "3_0").row("10_1", "10_0").build();
assertOperatorEqualsIgnoreOrder(joinOperatorFactory, driverContext, probePages.build(), expected);
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method searchScanFilterAndProjectOperatorStats.
private OperatorStats searchScanFilterAndProjectOperatorStats(QueryId queryId, String tableName) {
DistributedQueryRunner runner = (DistributedQueryRunner) getQueryRunner();
Plan plan = runner.getQueryPlan(queryId);
PlanNodeId nodeId = PlanNodeSearcher.searchFrom(plan.getRoot()).where(node -> {
if (!(node instanceof ProjectNode)) {
return false;
}
ProjectNode projectNode = (ProjectNode) node;
FilterNode filterNode = (FilterNode) projectNode.getSource();
TableScanNode tableScanNode = (TableScanNode) filterNode.getSource();
return tableName.equals(((HiveTableHandle) (tableScanNode.getTable().getConnectorHandle())).getTableName());
}).findOnlyElement().getId();
return runner.getCoordinator().getQueryManager().getFullQueryInfo(queryId).getQueryStats().getOperatorSummaries().stream().filter(summary -> nodeId.equals(summary.getPlanNodeId())).collect(MoreCollectors.onlyElement());
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class SectionExecutionFactory method createStageScheduler.
private StageScheduler createStageScheduler(SplitSourceFactory splitSourceFactory, Session session, StreamingSubPlan plan, Function<PartitioningHandle, NodePartitionMap> partitioningCache, Optional<SqlStageExecution> parentStageExecution, StageId stageId, SqlStageExecution stageExecution, PartitioningHandle partitioningHandle, TableWriteInfo tableWriteInfo, Set<SqlStageExecution> childStageExecutions) {
Map<PlanNodeId, SplitSource> splitSources = splitSourceFactory.createSplitSources(plan.getFragment(), session, tableWriteInfo);
int maxTasksPerStage = getMaxTasksPerStage(session);
if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
// nodes are selected dynamically based on the constraints of the splits and the system load
Map.Entry<PlanNodeId, SplitSource> entry = getOnlyElement(splitSources.entrySet());
PlanNodeId planNodeId = entry.getKey();
SplitSource splitSource = entry.getValue();
ConnectorId connectorId = splitSource.getConnectorId();
if (isInternalSystemConnector(connectorId)) {
connectorId = null;
}
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, connectorId, maxTasksPerStage);
SplitPlacementPolicy placementPolicy = new DynamicSplitPlacementPolicy(nodeSelector, stageExecution::getAllTasks);
checkArgument(!plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution());
return newSourcePartitionedSchedulerAsStageScheduler(stageExecution, planNodeId, splitSource, placementPolicy, splitBatchSize);
} else if (partitioningHandle.equals(SCALED_WRITER_DISTRIBUTION)) {
Supplier<Collection<TaskStatus>> sourceTasksProvider = () -> childStageExecutions.stream().map(SqlStageExecution::getAllTasks).flatMap(Collection::stream).map(RemoteTask::getTaskStatus).collect(toList());
Supplier<Collection<TaskStatus>> writerTasksProvider = () -> stageExecution.getAllTasks().stream().map(RemoteTask::getTaskStatus).collect(toList());
ScaledWriterScheduler scheduler = new ScaledWriterScheduler(stageExecution, sourceTasksProvider, writerTasksProvider, nodeScheduler.createNodeSelector(session, null), scheduledExecutor, getWriterMinSize(session), isOptimizedScaleWriterProducerBuffer(session));
whenAllStages(childStageExecutions, StageExecutionState::isDone).addListener(scheduler::finish, directExecutor());
return scheduler;
} else {
if (!splitSources.isEmpty()) {
// contains local source
List<PlanNodeId> schedulingOrder = plan.getFragment().getTableScanSchedulingOrder();
ConnectorId connectorId = partitioningHandle.getConnectorId().orElseThrow(IllegalStateException::new);
List<ConnectorPartitionHandle> connectorPartitionHandles;
boolean groupedExecutionForStage = plan.getFragment().getStageExecutionDescriptor().isStageGroupedExecution();
if (groupedExecutionForStage) {
connectorPartitionHandles = nodePartitioningManager.listPartitionHandles(session, partitioningHandle);
checkState(!ImmutableList.of(NOT_PARTITIONED).equals(connectorPartitionHandles));
} else {
connectorPartitionHandles = ImmutableList.of(NOT_PARTITIONED);
}
BucketNodeMap bucketNodeMap;
List<InternalNode> stageNodeList;
if (plan.getFragment().getRemoteSourceNodes().stream().allMatch(node -> node.getExchangeType() == REPLICATE)) {
// no non-replicated remote source
boolean dynamicLifespanSchedule = plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule();
bucketNodeMap = nodePartitioningManager.getBucketNodeMap(session, partitioningHandle, dynamicLifespanSchedule);
// verify execution is consistent with planner's decision on dynamic lifespan schedule
verify(bucketNodeMap.isDynamic() == dynamicLifespanSchedule);
if (bucketNodeMap.hasInitialMap()) {
stageNodeList = bucketNodeMap.getBucketToNode().get().stream().distinct().collect(toImmutableList());
} else {
stageNodeList = new ArrayList<>(nodeScheduler.createNodeSelector(session, connectorId).selectRandomNodes(maxTasksPerStage));
}
} else {
// cannot use dynamic lifespan schedule
verify(!plan.getFragment().getStageExecutionDescriptor().isDynamicLifespanSchedule());
// remote source requires nodePartitionMap
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
if (groupedExecutionForStage) {
checkState(connectorPartitionHandles.size() == nodePartitionMap.getBucketToPartition().length);
}
stageNodeList = nodePartitionMap.getPartitionToNode();
bucketNodeMap = nodePartitionMap.asBucketNodeMap();
}
FixedSourcePartitionedScheduler fixedSourcePartitionedScheduler = new FixedSourcePartitionedScheduler(stageExecution, splitSources, plan.getFragment().getStageExecutionDescriptor(), schedulingOrder, stageNodeList, bucketNodeMap, splitBatchSize, getConcurrentLifespansPerNode(session), nodeScheduler.createNodeSelector(session, connectorId), connectorPartitionHandles);
if (plan.getFragment().getStageExecutionDescriptor().isRecoverableGroupedExecution()) {
stageExecution.registerStageTaskRecoveryCallback(taskId -> {
checkArgument(taskId.getStageExecutionId().getStageId().equals(stageId), "The task did not execute this stage");
checkArgument(parentStageExecution.isPresent(), "Parent stage execution must exist");
checkArgument(parentStageExecution.get().getAllTasks().size() == 1, "Parent stage should only have one task for recoverable grouped execution");
parentStageExecution.get().removeRemoteSourceIfSingleTaskStage(taskId);
fixedSourcePartitionedScheduler.recover(taskId);
});
}
return fixedSourcePartitionedScheduler;
} else {
// all sources are remote
NodePartitionMap nodePartitionMap = partitioningCache.apply(plan.getFragment().getPartitioning());
List<InternalNode> partitionToNode = nodePartitionMap.getPartitionToNode();
// todo this should asynchronously wait a standard timeout period before failing
checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
return new FixedCountScheduler(stageExecution, partitionToNode);
}
}
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestScanFilterAndProjectOperator method testPageSourceLazyLoad.
@Test
public void testPageSourceLazyLoad() {
Block inputBlock = BlockAssertions.createLongSequenceBlock(0, 100);
// If column 1 is loaded, test will fail
Page input = new Page(100, inputBlock, new LazyBlock(100, lazyBlock -> {
throw new AssertionError("Lazy block should not be loaded");
}));
DriverContext driverContext = newDriverContext();
List<RowExpression> projections = ImmutableList.of(field(0, VARCHAR));
Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(driverContext.getSession().getSqlFunctionProperties(), Optional.empty(), projections, "key");
PageProcessor pageProcessor = new PageProcessor(Optional.of(new SelectAllFilter()), ImmutableList.of(new PageProjectionWithOutputs(new LazyPagePageProjection(), new int[] { 0 })));
ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns) -> new SinglePagePageSource(input), cursorProcessor, () -> pageProcessor, TESTING_TABLE_HANDLE, ImmutableList.of(), ImmutableList.of(BIGINT), Optional.empty(), new DataSize(0, BYTE), 0);
SourceOperator operator = factory.createOperator(driverContext);
operator.addSplit(new Split(new ConnectorId("test"), TestingTransactionHandle.create(), TestingSplit.createLocalSplit()));
operator.noMoreSplits();
MaterializedResult expected = toMaterializedResult(driverContext.getSession(), ImmutableList.of(BIGINT), ImmutableList.of(new Page(inputBlock)));
MaterializedResult actual = toMaterializedResult(driverContext.getSession(), ImmutableList.of(BIGINT), toPages(operator));
assertEquals(actual.getRowCount(), expected.getRowCount());
assertEquals(actual, expected);
}
Aggregations