use of io.prestosql.execution.RemoteTask in project hetu-core by openlookeng.
the class TestNodeScheduler method testMaxSplitsPerNodePerTask.
@Test
public void testMaxSplitsPerNodePerTask() {
setUpNodes();
InternalNode newNode = new InternalNode("other4", URI.create("http://10.0.0.1:14"), NodeVersion.UNKNOWN, false);
nodeManager.addNode(CONNECTOR_ID, newNode);
ImmutableList.Builder<Split> initialSplits = ImmutableList.builder();
for (int i = 0; i < 20; i++) {
initialSplits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
}
List<RemoteTask> tasks = new ArrayList<>();
MockRemoteTaskFactory remoteTaskFactory = new MockRemoteTaskFactory(remoteTaskExecutor, remoteTaskScheduledExecutor);
for (InternalNode node : nodeManager.getActiveConnectorNodes(CONNECTOR_ID)) {
// Max out number of splits on node
TaskId taskId = new TaskId("test", 1, 1);
RemoteTask remoteTask = remoteTaskFactory.createTableScanTask(taskId, node, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(node, taskId));
nodeTaskMap.addTask(node, remoteTask);
tasks.add(remoteTask);
}
TaskId taskId = new TaskId("test", 1, 2);
RemoteTask newRemoteTask = remoteTaskFactory.createTableScanTask(taskId, newNode, initialSplits.build(), nodeTaskMap.createPartitionedSplitCountTracker(newNode, taskId));
// Max out pending splits on new node
taskMap.put(newNode, newRemoteTask);
nodeTaskMap.addTask(newNode, newRemoteTask);
tasks.add(newRemoteTask);
Set<Split> splits = new HashSet<>();
for (int i = 0; i < 5; i++) {
splits.add(new Split(CONNECTOR_ID, new TestSplitRemote(), Lifespan.taskWide()));
}
Multimap<InternalNode, Split> assignments = nodeSelector.computeAssignments(splits, ImmutableList.copyOf(taskMap.values()), Optional.empty()).getAssignments();
// no split should be assigned to the newNode, as it already has
// maxSplitsPerNode + maxSplitsPerNodePerTask assigned to it
// Splits should be scheduled on the other three nodes
assertEquals(assignments.keySet().size(), 3);
// No splits scheduled on the maxed out node
assertFalse(assignments.keySet().contains(newNode));
for (RemoteTask task : tasks) {
task.abort();
}
assertEquals(nodeTaskMap.getPartitionedSplitsOnNode(newNode), 0);
}
use of io.prestosql.execution.RemoteTask in project hetu-core by openlookeng.
the class TestSourcePartitionedScheduler method testScheduleSplitsBlock.
@Test
public void testScheduleSplitsBlock() {
StageExecutionPlan plan = createPlan(createFixedSplitSource(80, TestingSplit::createRemoteSplit));
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1);
// schedule first 60 splits, which will cause the scheduler to block
for (int i = 0; i <= 60; i++) {
ScheduleResult scheduleResult = scheduler.schedule();
assertFalse(scheduleResult.isFinished());
// blocks at 20 per node
assertEquals(scheduleResult.getBlocked().isDone(), i != 60);
// first three splits create new tasks
assertEquals(scheduleResult.getNewTasks().size(), i < 3 ? 1 : 0);
assertEquals(stage.getAllTasks().size(), i < 3 ? i + 1 : 3);
assertPartitionedSplitCount(stage, min(i + 1, 60));
}
for (RemoteTask remoteTask : stage.getAllTasks()) {
assertEquals(remoteTask.getPartitionedSplitCount(), 20);
}
// todo rewrite MockRemoteTask to fire a tate transition when splits are cleared, and then validate blocked future completes
// drop the 20 splits from one node
((MockRemoteTask) stage.getAllTasks().get(0)).clearSplits();
// schedule remaining 20 splits
for (int i = 0; i < 20; i++) {
ScheduleResult scheduleResult = scheduler.schedule();
// finishes when last split is fetched
if (i == 19) {
assertEffectivelyFinished(scheduleResult, scheduler);
} else {
assertFalse(scheduleResult.isFinished());
}
// does not block again
assertTrue(scheduleResult.getBlocked().isDone());
// no additional tasks will be created
assertEquals(scheduleResult.getNewTasks().size(), 0);
assertEquals(stage.getAllTasks().size(), 3);
// we dropped 20 splits so start at 40 and count to 60
assertPartitionedSplitCount(stage, min(i + 41, 60));
}
for (RemoteTask remoteTask : stage.getAllTasks()) {
assertEquals(remoteTask.getPartitionedSplitCount(), 20);
}
stage.abort();
}
use of io.prestosql.execution.RemoteTask in project hetu-core by openlookeng.
the class TestSourcePartitionedScheduler method testScheduleSplitsOneAtATime.
@Test
public void testScheduleSplitsOneAtATime() {
StageExecutionPlan plan = createPlan(createFixedSplitSource(60, TestingSplit::createRemoteSplit));
NodeTaskMap nodeTaskMap = new NodeTaskMap(finalizerService);
SqlStageExecution stage = createSqlStageExecution(plan, nodeTaskMap);
try (StageScheduler scheduler = getSourcePartitionedScheduler(plan, stage, nodeManager, nodeTaskMap, 1)) {
for (int i = 0; i < 60; i++) {
ScheduleResult scheduleResult = scheduler.schedule();
// only finishes when last split is fetched
if (i == 59) {
assertEffectivelyFinished(scheduleResult, scheduler);
} else {
assertFalse(scheduleResult.isFinished());
}
// never blocks
assertTrue(scheduleResult.getBlocked().isDone());
// first three splits create new tasks
assertEquals(scheduleResult.getNewTasks().size(), i < 3 ? 1 : 0);
assertEquals(stage.getAllTasks().size(), i < 3 ? i + 1 : 3);
assertPartitionedSplitCount(stage, min(i + 1, 60));
}
for (RemoteTask remoteTask : stage.getAllTasks()) {
assertEquals(remoteTask.getPartitionedSplitCount(), 20);
}
stage.abort();
}
}
use of io.prestosql.execution.RemoteTask in project hetu-core by openlookeng.
the class TestFixedCountScheduler method testMultipleNodes.
@Test
public void testMultipleNodes() {
FixedCountScheduler nodeScheduler = new FixedCountScheduler((node, partition, totalPartitions) -> Optional.of(taskFactory.createTableScanTask(new TaskId("test", 1, 1), node, ImmutableList.of(), new PartitionedSplitCountTracker(delta -> {
}))), generateRandomNodes(5));
ScheduleResult result = nodeScheduler.schedule();
assertTrue(result.isFinished());
assertTrue(result.getBlocked().isDone());
assertEquals(result.getNewTasks().size(), 5);
assertEquals(result.getNewTasks().stream().map(RemoteTask::getNodeId).collect(toImmutableSet()).size(), 5);
}
use of io.prestosql.execution.RemoteTask in project hetu-core by openlookeng.
the class SimpleNodeSelector method computeAssignments.
@Override
public SplitPlacementResult computeAssignments(Set<Split> splits, List<RemoteTask> existingTasks, Optional<SqlStageExecution> stage) {
Multimap<InternalNode, Split> assignment = HashMultimap.create();
NodeMap nodeMapSlice = this.nodeMap.get().get();
NodeAssignmentStats assignmentStats = new NodeAssignmentStats(nodeTaskMap, nodeMapSlice, existingTasks);
ResettableRandomizedIterator<InternalNode> randomCandidates = randomizedNodes(nodeMapSlice, ImmutableSet.of());
Set<InternalNode> blockedExactNodes = new HashSet<>();
boolean splitWaitingForAnyNode = false;
// splitsToBeRedistributed becomes true only when splits go through locality-based assignment
boolean splitsToBeRedistributed = false;
Set<Split> remainingSplits = new HashSet<>();
// Check if the current stage has a TableScanNode which is reading the table for the 2nd time or beyond
if (stage.isPresent() && stage.get().getStateMachine().getConsumerScanNode() != null) {
try {
// if node exists, get the TableScanNode and cast it as consumer
TableScanNode consumer = stage.get().getStateMachine().getConsumerScanNode();
// all tables part of this stage
Map<PlanNodeId, TableInfo> tables = stage.get().getStageInfo().getTables();
QualifiedObjectName tableName;
for (Map.Entry<PlanNodeId, TableInfo> entry : tables.entrySet()) {
tableName = entry.getValue().getTableName();
if (tableSplitAssignmentInfo.getReuseTableScanMappingIdSplitAssignmentMap().containsKey(consumer.getReuseTableScanMappingId())) {
// compare splitkey using equals and then assign nodes accordingly.
HashMap<SplitKey, InternalNode> splitKeyNodeAssignment = tableSplitAssignmentInfo.getSplitKeyNodeAssignment(consumer.getReuseTableScanMappingId());
Set<SplitKey> splitKeySet = splitKeyNodeAssignment.keySet();
assignment.putAll(createConsumerScanNodeAssignment(tableName, splits, splitKeySet, splitKeyNodeAssignment));
for (Map.Entry<InternalNode, Split> nodeAssignmentEntry : assignment.entries()) {
InternalNode node = nodeAssignmentEntry.getKey();
assignmentStats.addAssignedSplit(node);
}
}
}
log.debug("Consumer:: Assignment size is " + assignment.size() + " ,Assignment is " + assignment + " ,Assignment Stats is " + assignmentStats);
} catch (NotImplementedException e) {
log.error("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
throw new UnsupportedOperationException("Not a Hive Split! Other Connector Splits not supported currently. Error: " + e);
}
} else {
// optimizedLocalScheduling enables prioritized assignment of splits to local nodes when splits contain locality information
if (optimizedLocalScheduling) {
// should not hit for consumer case
for (Split split : splits) {
if (split.isRemotelyAccessible() && !split.getAddresses().isEmpty()) {
List<InternalNode> candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
Optional<InternalNode> chosenNode = candidateNodes.stream().filter(ownerNode -> assignmentStats.getTotalSplitCount(ownerNode) < maxSplitsPerNode).min(comparingInt(assignmentStats::getTotalSplitCount));
if (chosenNode.isPresent()) {
assignment.put(chosenNode.get(), split);
// check later
assignmentStats.addAssignedSplit(chosenNode.get());
splitsToBeRedistributed = true;
continue;
}
}
remainingSplits.add(split);
}
} else {
remainingSplits = splits;
}
for (Split split : remainingSplits) {
randomCandidates.reset();
List<InternalNode> candidateNodes;
if (!split.isRemotelyAccessible()) {
candidateNodes = selectExactNodes(nodeMapSlice, split.getAddresses(), includeCoordinator);
} else {
candidateNodes = selectNodes(minCandidates, randomCandidates);
}
if (candidateNodes.isEmpty()) {
log.debug("No nodes available to schedule %s. Available nodes %s", split, nodeMapSlice.getNodesByHost().keys());
throw new PrestoException(NO_NODES_AVAILABLE, "No nodes available to run query");
}
InternalNode chosenNode = null;
int min = Integer.MAX_VALUE;
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getTotalSplitCount(node);
if (totalSplitCount < min && totalSplitCount < maxSplitsPerNode) {
chosenNode = node;
min = totalSplitCount;
}
}
if (chosenNode == null) {
// min is guaranteed to be MAX_VALUE at this line
for (InternalNode node : candidateNodes) {
int totalSplitCount = assignmentStats.getQueuedSplitCountForStage(node);
if (totalSplitCount < min && totalSplitCount < maxPendingSplitsPerTask) {
chosenNode = node;
min = totalSplitCount;
}
}
}
if (chosenNode != null) {
assignment.put(chosenNode, split);
assignmentStats.addAssignedSplit(chosenNode);
} else {
if (split.isRemotelyAccessible()) {
splitWaitingForAnyNode = true;
} else // Exact node set won't matter, if a split is waiting for any node
if (!splitWaitingForAnyNode) {
blockedExactNodes.addAll(candidateNodes);
}
}
}
}
ListenableFuture<?> blocked;
if (splitWaitingForAnyNode) {
blocked = toWhenHasSplitQueueSpaceFuture(existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
} else {
blocked = toWhenHasSplitQueueSpaceFuture(blockedExactNodes, existingTasks, calculateLowWatermark(maxPendingSplitsPerTask));
}
if (!stage.isPresent() || stage.get().getStateMachine().getConsumerScanNode() == null) {
if (splitsToBeRedistributed) {
// skip for consumer
equateDistribution(assignment, assignmentStats, nodeMapSlice);
}
}
// Check if the current stage has a TableScanNode which is reading the table for the 1st time
if (stage.isPresent() && stage.get().getStateMachine().getProducerScanNode() != null) {
// if node exists, get the TableScanNode and annotate it as producer
saveProducerScanNodeAssignment(stage, assignment, assignmentStats);
}
// Check if its CTE node and its feeder
if (stage.isPresent() && stage.get().getFragment().getFeederCTEId().isPresent()) {
updateFeederNodeAndSplitCount(stage.get(), assignment);
}
return new SplitPlacementResult(blocked, assignment);
}
Aggregations