Search in sources :

Example 96 with Priority

use of org.apache.hadoop.yarn.api.records.Priority in project hive by apache.

the class LlapTaskSchedulerService method dagComplete.

@Override
public void dagComplete() {
    // This is effectively DAG completed, and can be used to reset statistics being tracked.
    LOG.info("DAG: " + dagCounter.get() + " completed. Scheduling stats: " + dagStats);
    dagCounter.incrementAndGet();
    if (metrics != null) {
        metrics.incrCompletedDagCount();
    }
    long tgVersionForZk;
    writeLock.lock();
    try {
        dagRunning = false;
        dagStats = new StatsPerDag();
        int pendingCount = 0;
        for (Entry<Priority, List<TaskInfo>> entry : pendingTasks.entrySet()) {
            if (entry.getValue() != null) {
                pendingCount += entry.getValue().size();
            }
        }
        int runningCount = 0;
        // We don't send messages to pending tasks with the flags; they should be killed elsewhere.
        for (Entry<Integer, TreeSet<TaskInfo>> entry : guaranteedTasks.entrySet()) {
            TreeSet<TaskInfo> set = speculativeTasks.get(entry.getKey());
            if (set == null) {
                set = new TreeSet<>();
                speculativeTasks.put(entry.getKey(), set);
            }
            for (TaskInfo info : entry.getValue()) {
                synchronized (info) {
                    info.isGuaranteed = false;
                }
                set.add(info);
            }
        }
        guaranteedTasks.clear();
        for (Entry<Integer, TreeSet<TaskInfo>> entry : speculativeTasks.entrySet()) {
            if (entry.getValue() != null) {
                runningCount += entry.getValue().size();
            }
        }
        totalGuaranteed = unusedGuaranteed = 0;
        tgVersionForZk = ++totalGuaranteedVersion;
        if (metrics != null) {
            metrics.setDagId(null);
            // We remove the tasks above without state checks so just reset all metrics to 0.
            metrics.resetWmMetrics();
        }
        LOG.info("DAG reset. Current knownTaskCount={}, pendingTaskCount={}, runningTaskCount={}", knownTasks.size(), pendingCount, runningCount);
    } finally {
        writeLock.unlock();
    }
    if (workloadManagementEnabled) {
        updateGuaranteedInRegistry(tgVersionForZk, 0);
    }
// TODO Cleanup pending tasks etc, so that the next dag is not affected.
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StatsPerDag(org.apache.hadoop.hive.llap.tezplugins.scheduler.StatsPerDag) Priority(org.apache.hadoop.yarn.api.records.Priority) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList)

Example 97 with Priority

use of org.apache.hadoop.yarn.api.records.Priority in project hive by apache.

the class TestLlapTaskSchedulerService method testNotInQueue.

private void testNotInQueue(TestTaskSchedulerServiceWrapper tsWrapper, String[] hosts) throws InterruptedException {
    Priority priority1 = Priority.newInstance(1);
    try {
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(hosts, priority1);
        tsWrapper.allocateTask(hosts, priority1);
        // 1 more than capacity.
        tsWrapper.allocateTask(hosts, priority1);
        tsWrapper.awaitLocalTaskAllocations(2);
        assertEquals(0, tsWrapper.ts.delayedTaskQueue.size());
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority)

Example 98 with Priority

use of org.apache.hadoop.yarn.api.records.Priority in project hive by apache.

the class TestLlapTaskSchedulerService method testUpdateOnFinishingTask.

@Test(timeout = 10000)
public void testUpdateOnFinishingTask() throws IOException, InterruptedException {
    final TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper();
    // The update fails because the task has terminated on the node.
    try {
        Priority highPri = Priority.newInstance(1), lowPri = Priority.newInstance(2);
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId(), task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        tsWrapper.ts.updateGuaranteedCount(0);
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, null, highPri, new Object());
        tsWrapper.allocateTask(task2, null, lowPri, new Object());
        tsWrapper.awaitTotalTaskAllocations(2);
        TaskInfo ti1 = tsWrapper.ts.getTaskInfo(task1), ti2 = tsWrapper.ts.getTaskInfo(task2);
        // Concurrent increase and termination, increase fails.
        tsWrapper.ts.updateGuaranteedCount(1);
        tsWrapper.ts.waitForMessagesSent(1);
        assertTrue(ti1.isGuaranteed());
        // Not updated yet.
        assertFalse(ti1.getLastSetGuaranteed());
        assertTrue(ti1.isUpdateInProgress());
        tsWrapper.deallocateTask(task1, true, TaskAttemptEndReason.CONTAINER_EXITED);
        tsWrapper.ts.handleUpdateResult(ti1, false);
        // We must have the duck still; it should just go to the other task.
        assertTrue(ti2.isGuaranteed());
        assertTrue(ti2.isUpdateInProgress());
        tsWrapper.ts.handleUpdateResult(ti2, false);
        tsWrapper.deallocateTask(task2, true, TaskAttemptEndReason.CONTAINER_EXITED);
        // Same; with the termination after the failed update, we should maintain the correct count.
        assertEquals(1, tsWrapper.ts.getUnusedGuaranteedCount());
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : TaskInfo(org.apache.hadoop.hive.llap.tezplugins.LlapTaskSchedulerService.TaskInfo) Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 99 with Priority

use of org.apache.hadoop.yarn.api.records.Priority in project hive by apache.

the class TestLlapTaskSchedulerService method testAdjustLocalityDelay.

@Test(timeout = 10000)
public void testAdjustLocalityDelay() throws IOException, InterruptedException {
    Priority priority1 = Priority.newInstance(1);
    String[] host = new String[] { HOST1 };
    TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(2000, host, 1, 0, 1000l);
    try {
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie1 = "cookie1";
        TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie2 = "cookie2";
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, host, priority1, clientCookie1);
        tsWrapper.allocateTask(task2, host, priority1, clientCookie2);
        // There are enough resources for 1 task, the second one should just adjustLocalityDelay
        assertFalse(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 1) {
                break;
            }
        }
        // Active node instances do exist so delay should be adjusted
        assertTrue(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 100 with Priority

use of org.apache.hadoop.yarn.api.records.Priority in project hive by apache.

the class TestLlapTaskSchedulerService method testNodeReEnabled.

@Test(timeout = 10000)
public void testNodeReEnabled() throws InterruptedException, IOException {
    // Based on actual timing.
    TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(1000l);
    try {
        Priority priority1 = Priority.newInstance(1);
        String[] hosts1 = new String[] { HOST1 };
        String[] hosts2 = new String[] { HOST2 };
        String[] hosts3 = new String[] { HOST3 };
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie1 = new Object();
        TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie2 = new Object();
        TezTaskAttemptID task3 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie3 = new Object();
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, hosts1, priority1, clientCookie1);
        tsWrapper.allocateTask(task2, hosts2, priority1, clientCookie2);
        tsWrapper.allocateTask(task3, hosts3, priority1, clientCookie3);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 3) {
                break;
            }
        }
        verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), any(Container.class));
        assertEquals(3, tsWrapper.ts.dagStats.getNumLocalAllocations());
        assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
        assertEquals(3, tsWrapper.ts.dagStats.getNumTotalAllocations());
        tsWrapper.resetAppCallback();
        tsWrapper.rejectExecution(task1);
        tsWrapper.rejectExecution(task2);
        tsWrapper.rejectExecution(task3);
        // Verify that the node is blacklisted
        assertEquals(3, tsWrapper.ts.dagStats.getNumRejectedTasks());
        assertEquals(3, tsWrapper.ts.instanceToNodeMap.size());
        assertEquals(3, tsWrapper.ts.disabledNodesQueue.size());
        TezTaskAttemptID task4 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie4 = new Object();
        TezTaskAttemptID task5 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie5 = new Object();
        TezTaskAttemptID task6 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie6 = new Object();
        tsWrapper.allocateTask(task4, hosts1, priority1, clientCookie4);
        tsWrapper.allocateTask(task5, hosts2, priority1, clientCookie5);
        tsWrapper.allocateTask(task6, hosts3, priority1, clientCookie6);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 6) {
                break;
            }
        }
        ArgumentCaptor<Container> argumentCaptor = ArgumentCaptor.forClass(Container.class);
        verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), argumentCaptor.capture());
        // which affects the locality matching
        assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
        assertEquals(6, tsWrapper.ts.dagStats.getNumTotalAllocations());
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : Container(org.apache.hadoop.yarn.api.records.Container) Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Aggregations

Priority (org.apache.hadoop.yarn.api.records.Priority)216 Test (org.junit.Test)139 Resource (org.apache.hadoop.yarn.api.records.Resource)109 Container (org.apache.hadoop.yarn.api.records.Container)64 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)62 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)49 NodeId (org.apache.hadoop.yarn.api.records.NodeId)43 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)40 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)40 Configuration (org.apache.hadoop.conf.Configuration)37 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)34 ArrayList (java.util.ArrayList)33 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)31 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)30 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)29 TaskSchedulerContextDrainable (org.apache.tez.dag.app.rm.TestTaskSchedulerHelpers.TaskSchedulerContextDrainable)27 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)26 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)25 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)24 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)23