Search in sources :

Example 71 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskSchedulerService method allocateTask.

@Override
public void allocateTask(Object task, Resource capability, ContainerId containerId, Priority priority, Object containerSignature, Object clientCookie) {
    // Container affinity can be implemented as Host affinity for LLAP. Not required until
    // 1:1 edges are used in Hive.
    TezTaskAttemptID id = getTaskAttemptId(task);
    TaskInfo taskInfo = new TaskInfo(localityDelayConf, clock, task, clientCookie, priority, capability, null, null, clock.getTime(), id);
    LOG.info("Received allocateRequest. task={}, priority={}, capability={}, containerId={}", task, priority, capability, containerId);
    if (!dagRunning) {
        if (metrics != null && id != null) {
            metrics.setDagId(id.getTaskID().getVertexID().getDAGId().toString());
        }
        dagRunning = true;
    }
    dagStats.registerTaskRequest(null, null);
    addPendingTask(taskInfo);
    trySchedulingPendingTasks();
}
Also used : TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 72 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class LlapTaskCommunicator method nodePinged.

void nodePinged(String hostname, String uniqueId, int port, TezAttemptArray tasks, BooleanArray guaranteed) {
    // TODO: do we ever need the port? we could just do away with nodeId altogether.
    LlapNodeId nodeId = LlapNodeId.getInstance(hostname, port);
    registerPingingNode(nodeId, uniqueId);
    BiMap<ContainerId, TezTaskAttemptID> biMap = entityTracker.getContainerAttemptMapForNode(nodeId);
    if (biMap != null) {
        Set<TezTaskAttemptID> error = new HashSet<>();
        synchronized (biMap) {
            for (int i = 0; i < tasks.get().length; ++i) {
                boolean isGuaranteed = false;
                if (guaranteed != null) {
                    isGuaranteed = ((BooleanWritable) guaranteed.get()[i]).get();
                }
                TezTaskAttemptID attemptID = (TezTaskAttemptID) tasks.get()[i];
                // Check if the taskAttempt is present in AM view
                if (biMap.containsValue(attemptID)) {
                    String taskNodeId = entityTracker.getUniqueNodeId(attemptID);
                    if (taskNodeId != null && taskNodeId.equals(uniqueId)) {
                        getContext().taskAlive(attemptID);
                        scheduler.taskInfoUpdated(attemptID, isGuaranteed);
                        getContext().containerAlive(biMap.inverse().get(attemptID));
                    } else {
                        error.add(attemptID);
                    }
                }
            }
        }
        if (!error.isEmpty()) {
            LOG.info("The tasks we expected to be on the node are not there: " + error);
            for (TezTaskAttemptID attempt : error) {
                LOG.info("Sending a kill for attempt {}, due to a ping from " + "node with same host and same port but " + "registered with different unique ID", attempt);
                getContext().taskKilled(attempt, TaskAttemptEndReason.NODE_FAILED, "Node with same host and port but with new unique ID pinged");
            }
        }
    } else {
        long currentTs = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        if (currentTs > nodeNotFoundLogTime.get() + 5000l) {
            LOG.warn("Received ping from node without any registered tasks or containers: " + hostname + ":" + port + ". Could be caused by pre-emption by the AM," + " or a mismatched hostname. Enable debug logging for mismatched host names");
            nodeNotFoundLogTime.set(currentTs);
        }
    }
}
Also used : LlapNodeId(org.apache.hadoop.hive.llap.LlapNodeId) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ByteString(com.google.protobuf.ByteString) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) HashSet(java.util.HashSet)

Example 73 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TestLlapTaskSchedulerService method testUpdateOnFinishingTask.

@Test(timeout = 10000)
public void testUpdateOnFinishingTask() throws IOException, InterruptedException {
    final TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper();
    // The update fails because the task has terminated on the node.
    try {
        Priority highPri = Priority.newInstance(1), lowPri = Priority.newInstance(2);
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId(), task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        tsWrapper.ts.updateGuaranteedCount(0);
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, null, highPri, new Object());
        tsWrapper.allocateTask(task2, null, lowPri, new Object());
        tsWrapper.awaitTotalTaskAllocations(2);
        TaskInfo ti1 = tsWrapper.ts.getTaskInfo(task1), ti2 = tsWrapper.ts.getTaskInfo(task2);
        // Concurrent increase and termination, increase fails.
        tsWrapper.ts.updateGuaranteedCount(1);
        tsWrapper.ts.waitForMessagesSent(1);
        assertTrue(ti1.isGuaranteed());
        // Not updated yet.
        assertFalse(ti1.getLastSetGuaranteed());
        assertTrue(ti1.isUpdateInProgress());
        tsWrapper.deallocateTask(task1, true, TaskAttemptEndReason.CONTAINER_EXITED);
        tsWrapper.ts.handleUpdateResult(ti1, false);
        // We must have the duck still; it should just go to the other task.
        assertTrue(ti2.isGuaranteed());
        assertTrue(ti2.isUpdateInProgress());
        tsWrapper.ts.handleUpdateResult(ti2, false);
        tsWrapper.deallocateTask(task2, true, TaskAttemptEndReason.CONTAINER_EXITED);
        // Same; with the termination after the failed update, we should maintain the correct count.
        assertEquals(1, tsWrapper.ts.getUnusedGuaranteedCount());
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : TaskInfo(org.apache.hadoop.hive.llap.tezplugins.LlapTaskSchedulerService.TaskInfo) Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 74 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TestLlapTaskSchedulerService method testAdjustLocalityDelay.

@Test(timeout = 10000)
public void testAdjustLocalityDelay() throws IOException, InterruptedException {
    Priority priority1 = Priority.newInstance(1);
    String[] host = new String[] { HOST1 };
    TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(2000, host, 1, 0, 1000l);
    try {
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie1 = "cookie1";
        TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie2 = "cookie2";
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, host, priority1, clientCookie1);
        tsWrapper.allocateTask(task2, host, priority1, clientCookie2);
        // There are enough resources for 1 task, the second one should just adjustLocalityDelay
        assertFalse(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 1) {
                break;
            }
        }
        // Active node instances do exist so delay should be adjusted
        assertTrue(tsWrapper.ts.getTaskInfo(task2).adjustedLocalityDelay);
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 75 with TezTaskAttemptID

use of org.apache.tez.dag.records.TezTaskAttemptID in project hive by apache.

the class TestLlapTaskSchedulerService method testNodeReEnabled.

@Test(timeout = 10000)
public void testNodeReEnabled() throws InterruptedException, IOException {
    // Based on actual timing.
    TestTaskSchedulerServiceWrapper tsWrapper = new TestTaskSchedulerServiceWrapper(1000l);
    try {
        Priority priority1 = Priority.newInstance(1);
        String[] hosts1 = new String[] { HOST1 };
        String[] hosts2 = new String[] { HOST2 };
        String[] hosts3 = new String[] { HOST3 };
        TezTaskAttemptID task1 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie1 = new Object();
        TezTaskAttemptID task2 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie2 = new Object();
        TezTaskAttemptID task3 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie3 = new Object();
        tsWrapper.controlScheduler(true);
        tsWrapper.allocateTask(task1, hosts1, priority1, clientCookie1);
        tsWrapper.allocateTask(task2, hosts2, priority1, clientCookie2);
        tsWrapper.allocateTask(task3, hosts3, priority1, clientCookie3);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 3) {
                break;
            }
        }
        verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), any(Container.class));
        assertEquals(3, tsWrapper.ts.dagStats.getNumLocalAllocations());
        assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
        assertEquals(3, tsWrapper.ts.dagStats.getNumTotalAllocations());
        tsWrapper.resetAppCallback();
        tsWrapper.rejectExecution(task1);
        tsWrapper.rejectExecution(task2);
        tsWrapper.rejectExecution(task3);
        // Verify that the node is blacklisted
        assertEquals(3, tsWrapper.ts.dagStats.getNumRejectedTasks());
        assertEquals(3, tsWrapper.ts.instanceToNodeMap.size());
        assertEquals(3, tsWrapper.ts.disabledNodesQueue.size());
        TezTaskAttemptID task4 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie4 = new Object();
        TezTaskAttemptID task5 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie5 = new Object();
        TezTaskAttemptID task6 = TestTaskSchedulerServiceWrapper.generateTaskAttemptId();
        Object clientCookie6 = new Object();
        tsWrapper.allocateTask(task4, hosts1, priority1, clientCookie4);
        tsWrapper.allocateTask(task5, hosts2, priority1, clientCookie5);
        tsWrapper.allocateTask(task6, hosts3, priority1, clientCookie6);
        while (true) {
            tsWrapper.signalSchedulerRun();
            tsWrapper.awaitSchedulerRun();
            if (tsWrapper.ts.dagStats.getNumTotalAllocations() == 6) {
                break;
            }
        }
        ArgumentCaptor<Container> argumentCaptor = ArgumentCaptor.forClass(Container.class);
        verify(tsWrapper.mockAppCallback, times(3)).taskAllocated(any(Object.class), any(Object.class), argumentCaptor.capture());
        // which affects the locality matching
        assertEquals(0, tsWrapper.ts.dagStats.getNumAllocationsNoLocalityRequest());
        assertEquals(6, tsWrapper.ts.dagStats.getNumTotalAllocations());
    } finally {
        tsWrapper.shutdown();
    }
}
Also used : Container(org.apache.hadoop.yarn.api.records.Container) Priority(org.apache.hadoop.yarn.api.records.Priority) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Aggregations

TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)167 Test (org.junit.Test)124 TezTaskID (org.apache.tez.dag.records.TezTaskID)61 TezVertexID (org.apache.tez.dag.records.TezVertexID)54 Container (org.apache.hadoop.yarn.api.records.Container)48 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)46 TezDAGID (org.apache.tez.dag.records.TezDAGID)43 Configuration (org.apache.hadoop.conf.Configuration)42 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)41 Priority (org.apache.hadoop.yarn.api.records.Priority)41 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)41 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)33 Resource (org.apache.hadoop.yarn.api.records.Resource)30 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)28 EventMetaData (org.apache.tez.runtime.api.impl.EventMetaData)28 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)27 ContainerHeartbeatHandler (org.apache.tez.dag.app.ContainerHeartbeatHandler)27 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)27 ContainerContextMatcher (org.apache.tez.dag.app.rm.container.ContainerContextMatcher)27 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)25