use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class LlapTaskSchedulerService method allocateTask.
@Override
public void allocateTask(Object task, Resource capability, ContainerId containerId, Priority priority, Object containerSignature, Object clientCookie) {
// Container affinity can be implemented as Host affinity for LLAP. Not required until
// 1:1 edges are used in Hive.
TezTaskAttemptID id = getTaskAttemptId(task);
TaskInfo taskInfo = new TaskInfo(localityDelayConf, clock, task, clientCookie, priority, capability, null, null, clock.getTime(), id);
LOG.info("Received allocateRequest. task={}, priority={}, capability={}, containerId={}", task, priority, capability, containerId);
if (!dagRunning) {
if (metrics != null && id != null) {
metrics.setDagId(id.getTaskID().getVertexID().getDAGId().toString());
}
dagRunning = true;
}
dagStats.registerTaskRequest(null, null);
addPendingTask(taskInfo);
trySchedulingPendingTasks();
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class LlapTaskCommunicator method nodePinged.
void nodePinged(String hostname, String uniqueId, int port, TezAttemptArray tasks, BooleanArray guaranteed) {
// TODO: do we ever need the port? we could just do away with nodeId altogether.
LlapNodeId nodeId = LlapNodeId.getInstance(hostname, port);
registerPingingNode(nodeId, uniqueId);
BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(nodeId);
if (biMap != null) {
Set<TezTaskAttemptID> error = new HashSet<>();
synchronized (biMap) {
for (int i = 0; i < tasks.get().length; ++i) {
boolean isGuaranteed = false;
if (guaranteed != null) {
isGuaranteed = ((BooleanWritable) guaranteed.get()[i]).get();
}
TezTaskAttemptID attemptID = (TezTaskAttemptID) tasks.get()[i];
// Check if the taskAttempt is present in AM view
if (biMap.containsValue(attemptID)) {
String taskNodeId = entityTracker.getUniqueNodeId(attemptID);
if (taskNodeId != null && taskNodeId.equals(uniqueId)) {
getContext().taskAlive(attemptID);
scheduler.taskInfoUpdated(attemptID, isGuaranteed);
getContext().containerAlive(biMap.inverse().get(attemptID));
} else {
error.add(attemptID);
}
}
}
}
if (!error.isEmpty()) {
LOG.info("The tasks we expected to be on the node are not there: " + error);
for (TezTaskAttemptID attempt : error) {
LOG.info("Sending a kill for attempt {}, due to a ping from " + "node with same host and same port but " + "registered with different unique ID", attempt);
getContext().taskKilled(attempt, TaskAttemptEndReason.NODE_FAILED, "Node with same host and port but with new unique ID pinged");
}
}
} else {
long currentTs = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
if (currentTs > nodeNotFoundLogTime.get() + 5000l) {
LOG.warn("Received ping from node without any registered tasks or containers: " + hostname + ":" + port + ". Could be caused by pre-emption by the AM," + " or a mismatched hostname. Enable debug logging for mismatched host names");
nodeNotFoundLogTime.set(currentTs);
}
}
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class TestLlapTaskSchedulerService method testUpdateOnFinishingTask.
@Test(timeout = 10000)
public void testUpdateOnFinishingTask() throws IOException, InterruptedException {
final TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper();
// The update fails because the task has terminated on the node.
try {
Priority highPri = Priority.newInstance(1), lowPri = Priority.newInstance(2);
TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId(), task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
tsWrapper.ts.updateGuaranteedCount(0);
tsWrapper.controlScheduler(true);
tsWrapper.allocateTask(task1, null, highPri, new Object());
tsWrapper.allocateTask(task2, null, lowPri, new Object());
tsWrapper.awaitTotalTaskAllocations(2);
TaskInfo ti1 = tsWrapper.ts.getTaskInfo(task1), ti2 = tsWrapper.ts.getTaskInfo(task2);
// Concurrent increase and termination, increase fails.
tsWrapper.ts.updateGuaranteedCount(1);
tsWrapper.ts.waitForMessagesSent(1);
assertTrue(ti1.isGuaranteed());
// Not updated yet.
assertFalse(ti1.getLastSetGuaranteed());
assertTrue(ti1.isUpdateInProgress());
tsWrapper.deallocateTask(task1, true, TaskAttemptEndReason.CONTAINER_EXITED);
tsWrapper.ts.handleUpdateResult(ti1, false);
// We must have the duck still; it should just go to the other task.
assertTrue(ti2.isGuaranteed());
assertTrue(ti2.isUpdateInProgress());
tsWrapper.ts.handleUpdateResult(ti2, false);
tsWrapper.deallocateTask(task2, true, TaskAttemptEndReason.CONTAINER_EXITED);
// Same; with the termination after the failed update, we should maintain the correct count.
assertEquals(1, tsWrapper.ts.getUnusedGuaranteedCount());
} finally {
tsWrapper.shutdown();
}
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class TestLlapTaskSchedulerService method testAdjustLocalityDelay.
@Test(timeout = 10000)
public void testAdjustLocalityDelay() throws IOException, InterruptedException {
Priority priority1 = Priority.newInstance(1);
String[] host = new String[] { HOST1 };
TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(2000, host, 1, 0, 1000l);
try {
TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie1 = "cookie1";
TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie2 = "cookie2";
tsWrapper.controlScheduler(true);
tsWrapper.allocateTask(task1, host, priority1, clientCookie1);
tsWrapper.allocateTask(task2, host, priority1, clientCookie2);
// There are enough resources for 1 task, the second one should just adjustLocalityDelay
assertFalse(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
while (true) {
tsWrapper.signalSchedulerRun();
tsWrapper.awaitSchedulerRun();
if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 1) {
break;
}
}
// Active node instances do exist so delay should be adjusted
assertTrue(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
} finally {
tsWrapper.shutdown();
}
}
use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.
the class TestLlapTaskSchedulerService method testNodeReEnabled.
@Test(timeout = 10000)
public void testNodeReEnabled() throws InterruptedException, IOException {
// Based on actual timing.
TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(1000l);
try {
Priority priority1 = Priority.newInstance(1);
String[] hosts1 = new String[] { HOST1 };
String[] hosts2 = new String[] { HOST2 };
String[] hosts3 = new String[] { HOST3 };
TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie1 = new Object();
TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie2 = new Object();
TezTaskAttemptID task3 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie3 = new Object();
tsWrapper.controlScheduler(true);
tsWrapper.allocateTask(task1, hosts1, priority1, clientCookie1);
tsWrapper.allocateTask(task2, hosts2, priority1, clientCookie2);
tsWrapper.allocateTask(task3, hosts3, priority1, clientCookie3);
while (true) {
tsWrapper.signalSchedulerRun();
tsWrapper.awaitSchedulerRun();
if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 3) {
break;
}
}
verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), any(Container.class));
assertEquals(3, tsWrapper.ts.dagStats.getNumLocalAllocations());
assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
assertEquals(3, tsWrapper.ts.dagStats.getNumTotalAllocations());
tsWrapper.resetAppCallback();
tsWrapper.rejectExecution(task1);
tsWrapper.rejectExecution(task2);
tsWrapper.rejectExecution(task3);
// Verify that the node is blacklisted
assertEquals(3, tsWrapper.ts.dagStats.getNumRejectedTasks());
assertEquals(3, tsWrapper.ts.instanceToNodeMap.size());
assertEquals(3, tsWrapper.ts.disabledNodesQueue.size());
TezTaskAttemptID task4 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie4 = new Object();
TezTaskAttemptID task5 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie5 = new Object();
TezTaskAttemptID task6 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
Object clientCookie6 = new Object();
tsWrapper.allocateTask(task4, hosts1, priority1, clientCookie4);
tsWrapper.allocateTask(task5, hosts2, priority1, clientCookie5);
tsWrapper.allocateTask(task6, hosts3, priority1, clientCookie6);
while (true) {
tsWrapper.signalSchedulerRun();
tsWrapper.awaitSchedulerRun();
if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 6) {
break;
}
}
ArgumentCaptor<Container> argumentCaptor = ArgumentCaptor.forClass(Container.class);
verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), argumentCaptor.capture());
// which affects the locality matching
assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
assertEquals(6, tsWrapper.ts.dagStats.getNumTotalAllocations());
} finally {
tsWrapper.shutdown();
}
}
Aggregations