use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestingOperatorContext method create.
public static OperatorContext create(ScheduledExecutorService scheduledExecutor) {
Executor executor = MoreExecutors.directExecutor();
TaskContext taskContext = TestingTaskContext.createTaskContext(executor, scheduledExecutor, TestingSession.testSessionBuilder().build());
MemoryTrackingContext pipelineMemoryContext = new MemoryTrackingContext(newSimpleAggregatedMemoryContext(), newSimpleAggregatedMemoryContext(), newSimpleAggregatedMemoryContext());
PipelineContext pipelineContext = new PipelineContext(1, taskContext, executor, scheduledExecutor, pipelineMemoryContext, false, false, false);
DriverContext driverContext = new DriverContext(pipelineContext, executor, scheduledExecutor, pipelineMemoryContext, Lifespan.taskWide(), Optional.empty(), 0L);
OperatorContext operatorContext = driverContext.addOperatorContext(1, new PlanNodeId("test"), "operator type");
return operatorContext;
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class SqlStageExecution method scheduleTask.
private synchronized RemoteTask scheduleTask(InternalNode node, TaskId taskId, Multimap<PlanNodeId, Split> sourceSplits) {
checkArgument(!allTasks.contains(taskId), "A task with id %s already exists", taskId);
ImmutableMultimap.Builder<PlanNodeId, Split> initialSplits = ImmutableMultimap.builder();
initialSplits.putAll(sourceSplits);
sourceTasks.forEach((planNodeId, task) -> {
TaskStatus status = task.getTaskStatus();
if (status.getState() != TaskState.FINISHED) {
initialSplits.put(planNodeId, createRemoteSplitFor(taskId, task.getRemoteTaskLocation(), task.getTaskId()));
}
});
OutputBuffers outputBuffers = this.outputBuffers.get();
checkState(outputBuffers != null, "Initial output buffers must be set before a task can be scheduled");
RemoteTask task = remoteTaskFactory.createRemoteTask(session, taskId, node, planFragment, initialSplits.build(), outputBuffers, nodeTaskMap.createTaskStatsTracker(node, taskId), summarizeTaskInfo, tableWriteInfo);
completeSources.forEach(task::noMoreSplits);
allTasks.add(taskId);
tasks.computeIfAbsent(node, key -> newConcurrentHashSet()).add(task);
nodeTaskMap.addTask(node, task);
task.addStateChangeListener(new StageTaskListener(taskId));
task.addFinalTaskInfoListener(this::updateFinalTaskInfo);
if (!stateMachine.getState().isDone()) {
task.start();
} else {
// stage finished while we were scheduling this task
task.abort();
}
return task;
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class SqlTaskExecution method scheduleTableScanSource.
private synchronized void scheduleTableScanSource(TaskSource sourceUpdate) {
mergeIntoPendingSplits(sourceUpdate.getPlanNodeId(), sourceUpdate.getSplits(), sourceUpdate.getNoMoreSplitsForLifespan(), sourceUpdate.isNoMoreSplits());
while (true) {
// SchedulingLifespanManager tracks how far each Lifespan has been scheduled. Here is an example.
// Let's say there are 4 source pipelines/nodes: A, B, C, and D, in scheduling order.
// And we're processing 3 concurrent lifespans at a time. In this case, we could have
//
// * Lifespan 10: A B [C] D; i.e. Pipeline A and B has finished scheduling (but not necessarily finished running).
// * Lifespan 20: [A] B C D
// * Lifespan 30: A [B] C D
//
// To recap, SchedulingLifespanManager records the next scheduling source node for each lifespan.
Iterator<SchedulingLifespan> activeLifespans = schedulingLifespanManager.getActiveLifespans();
boolean madeProgress = false;
while (activeLifespans.hasNext()) {
SchedulingLifespan schedulingLifespan = activeLifespans.next();
Lifespan lifespan = schedulingLifespan.getLifespan();
// This is why getSchedulingPlanNode returns an Optional.
while (true) {
Optional<PlanNodeId> optionalSchedulingPlanNode = schedulingLifespan.getSchedulingPlanNode();
if (!optionalSchedulingPlanNode.isPresent()) {
break;
}
PlanNodeId schedulingPlanNode = optionalSchedulingPlanNode.get();
DriverSplitRunnerFactory partitionedDriverRunnerFactory = driverRunnerFactoriesWithSplitLifeCycle.get(schedulingPlanNode);
PendingSplits pendingSplits = pendingSplitsByPlanNode.get(schedulingPlanNode).getLifespan(lifespan);
// Enqueue driver runners with driver group lifecycle for this driver life cycle, if not already enqueued.
if (!lifespan.isTaskWide() && !schedulingLifespan.getAndSetDriversForDriverGroupLifeCycleScheduled()) {
scheduleDriversForDriverGroupLifeCycle(lifespan);
}
// Enqueue driver runners with split lifecycle for this plan node and driver life cycle combination.
ImmutableList.Builder<DriverSplitRunner> runners = ImmutableList.builder();
for (ScheduledSplit scheduledSplit : pendingSplits.removeAllSplits()) {
// create a new driver for the split
runners.add(partitionedDriverRunnerFactory.createDriverRunner(scheduledSplit, lifespan));
}
enqueueDriverSplitRunner(false, runners.build());
// move on to the next plan node.
if (pendingSplits.getState() != NO_MORE_SPLITS) {
break;
}
partitionedDriverRunnerFactory.noMoreDriverRunner(ImmutableList.of(lifespan));
pendingSplits.markAsCleanedUp();
schedulingLifespan.nextPlanNode();
madeProgress = true;
if (schedulingLifespan.isDone()) {
break;
}
}
}
if (!madeProgress) {
break;
}
}
if (sourceUpdate.isNoMoreSplits()) {
schedulingLifespanManager.noMoreSplits(sourceUpdate.getPlanNodeId());
}
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class HttpRemoteTask method addSplits.
@Override
public synchronized void addSplits(Multimap<PlanNodeId, Split> splitsBySource) {
requireNonNull(splitsBySource, "splitsBySource is null");
// only add pending split if not done
if (getTaskStatus().getState().isDone()) {
return;
}
boolean needsUpdate = false;
for (Entry<PlanNodeId, Collection<Split>> entry : splitsBySource.asMap().entrySet()) {
PlanNodeId sourceId = entry.getKey();
Collection<Split> splits = entry.getValue();
boolean isTableScanSource = tableScanPlanNodeIds.contains(sourceId);
checkState(!noMoreSplits.containsKey(sourceId), "noMoreSplits has already been set for %s", sourceId);
int added = 0;
long addedWeight = 0;
for (Split split : splits) {
if (pendingSplits.put(sourceId, new ScheduledSplit(nextSplitId.getAndIncrement(), sourceId, split))) {
if (isTableScanSource) {
added++;
addedWeight = addExact(addedWeight, split.getSplitWeight().getRawValue());
}
}
}
if (isTableScanSource) {
pendingSourceSplitCount += added;
pendingSourceSplitsWeight = addExact(pendingSourceSplitsWeight, addedWeight);
updateTaskStats();
}
needsUpdate = true;
}
updateSplitQueueSpace();
if (needsUpdate) {
this.needsUpdate.set(true);
scheduleUpdate();
}
}
use of com.facebook.presto.spi.plan.PlanNodeId in project presto by prestodb.
the class TestNodeScheduler method testTopologyAwareScheduling.
@Test(timeOut = 60 * 1000)
public void testTopologyAwareScheduling() throws Exception {
TestingTransactionHandle transactionHandle = TestingTransactionHandle.create();
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
InMemoryNodeManager nodeManager = new InMemoryNodeManager();
ImmutableList.Builder<InternalNode> nodeBuilder = ImmutableList.builder();
nodeBuilder.add(new InternalNode("node1", URI.create("http://host1.rack1:11"), NodeVersion.UNKNOWN, false));
nodeBuilder.add(new InternalNode("node2", URI.create("http://host2.rack1:12"), NodeVersion.UNKNOWN, false));
nodeBuilder.add(new InternalNode("node3", URI.create("http://host3.rack2:13"), NodeVersion.UNKNOWN, false));
List<InternalNode> nodes = nodeBuilder.build();
nodeManager.addNode(CONNECTOR_ID, nodes);
// contents of taskMap indicate the node-task map for the current stage
Map<InternalNode, RemoteTask> taskMap = new HashMap<>();
NodeSchedulerConfig nodeSchedulerConfig = new NodeSchedulerConfig().setMaxSplitsPerNode(25).setIncludeCoordinator(false).setNetworkTopology("test").setMaxPendingSplitsPerTask(20);
TestNetworkTopology topology = new TestNetworkTopology();
NetworkLocationCache locationCache = new NetworkLocationCache(topology) {
@Override
public NetworkLocation get(HostAddress host) {
// Bypass the cache for workers, since we only look them up once and they would all be unresolved otherwise
if (host.getHostText().startsWith("host")) {
return topology.locate(host);
} else {
return super.get(host);
}
}
};
NodeScheduler nodeScheduler = new NodeScheduler(locationCache, topology, nodeManager, new NodeSelectionStats(), nodeSchedulerConfig, nodeTaskMap, new Duration(5, SECONDS), new ThrowingNodeTtlFetcherManager(), new NoOpQueryManager(), new SimpleTtlNodeSelectorConfig());
NodeSelector nodeSelector = nodeScheduler.createNodeSelector(session, CONNECTOR_ID);
// Fill up the nodes with non-local data
ImmutableSet.Builder<Split> nonRackLocalBuilder = ImmutableSet.builder();
for (int i = 0; i < (25 + 11) * 3; i++) {
nonRackLocalBuilder.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("data.other_rack", 1))));
}
Set<Split> nonRackLocalSplits = nonRackLocalBuilder.build();
Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
int task = 0;
for (InternalNode node : assignments.keySet()) {
TaskId taskId = new TaskId("test", 1, 0, task);
task++;
MockRemoteTaskFactory.MockRemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, ImmutableList.copyOf(assignments.get(node)), nodeTaskMap.createTaskStatsTracker(node, taskId));
remoteTask.startSplits(25);
nodeTaskMap.addTask(node, remoteTask);
taskMap.put(node, remoteTask);
}
// Continue assigning to fill up part of the queue
nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
assignments = nodeSelector.computeAssignments(nonRackLocalSplits, ImmutableList.copyOf(taskMap.values())).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = taskMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
nonRackLocalSplits = Sets.difference(nonRackLocalSplits, new HashSet<>(assignments.values()));
// Check that 3 of the splits were rejected, since they're non-local
assertEquals(nonRackLocalSplits.size(), 3);
// Assign rack-local splits
ImmutableSet.Builder<Split> rackLocalSplits = ImmutableSet.builder();
HostAddress dataHost1 = HostAddress.fromParts("data.rack1", 1);
HostAddress dataHost2 = HostAddress.fromParts("data.rack2", 1);
for (int i = 0; i < 6 * 2; i++) {
rackLocalSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(dataHost1)));
}
for (int i = 0; i < 6; i++) {
rackLocalSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(dataHost2)));
}
assignments = nodeSelector.computeAssignments(rackLocalSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = taskMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
Set<Split> unassigned = Sets.difference(rackLocalSplits.build(), new HashSet<>(assignments.values()));
// Compute the assignments a second time to account for the fact that some splits may not have been assigned due to asynchronous
// loading of the NetworkLocationCache
boolean cacheRefreshed = false;
while (!cacheRefreshed) {
cacheRefreshed = true;
if (locationCache.get(dataHost1).equals(ROOT_LOCATION)) {
cacheRefreshed = false;
}
if (locationCache.get(dataHost2).equals(ROOT_LOCATION)) {
cacheRefreshed = false;
}
MILLISECONDS.sleep(10);
}
assignments = nodeSelector.computeAssignments(unassigned, ImmutableList.copyOf(taskMap.values())).getAssignments();
for (InternalNode node : assignments.keySet()) {
RemoteTask remoteTask = taskMap.get(node);
remoteTask.addSplits(ImmutableMultimap.<PlanNodeId, Split>builder().putAll(new PlanNodeId("sourceId"), assignments.get(node)).build());
}
unassigned = Sets.difference(unassigned, new HashSet<>(assignments.values()));
assertEquals(unassigned.size(), 3);
int rack1 = 0;
int rack2 = 0;
for (Split split : unassigned) {
String rack = topology.locate(split.getPreferredNodes(new ModularHashingNodeProvider(nodeSelector.getAllNodes())).get(0)).getSegments().get(0);
switch(rack) {
case "rack1":
rack1++;
break;
case "rack2":
rack2++;
break;
default:
fail();
}
}
assertEquals(rack1, 2);
assertEquals(rack2, 1);
// Assign local splits
ImmutableSet.Builder<Split> localSplits = ImmutableSet.builder();
localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host1.rack1", 1))));
localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host2.rack1", 1))));
localSplits.add(new Split(CONNECTOR_ID, transactionHandle, new TestSplitRemote(HostAddress.fromParts("host3.rack2", 1))));
assignments = nodeSelector.computeAssignments(localSplits.build(), ImmutableList.copyOf(taskMap.values())).getAssignments();
assertEquals(assignments.size(), 3);
assertEquals(assignments.keySet().size(), 3);
}
Aggregations