Search in sources :

Example 1 with TaskAttemptEventKillRequest

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest in project tez by apache.

the class TaskImpl method killUnfinishedAttempt.

private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg, TaskAttemptTerminationCause errorCause) {
    if (commitAttempt != null && commitAttempt.equals(attempt.getID())) {
        LOG.info("Unsetting commit attempt: " + commitAttempt + " since attempt is being killed");
        commitAttempt = null;
    }
    if (attempt != null && !attempt.isFinished()) {
        eventHandler.handle(new TaskAttemptEventKillRequest(attempt.getID(), logMsg, errorCause));
    }
}
Also used : TaskAttemptEventKillRequest(org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest)

Example 2 with TaskAttemptEventKillRequest

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest in project tez by apache.

the class TestTaskAttempt method testLaunchFailedWhileKilling.

@Test(timeout = 5000)
public // received while STARTING
void testLaunchFailedWhileKilling() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
    TezTaskAttemptID taskAttemptID = TezTaskAttemptID.getInstance(taskID, 0);
    MockEventHandler eventHandler = new MockEventHandler();
    TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener();
    Configuration taskConf = new Configuration();
    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    taskConf.setBoolean("fs.file.impl.disable.cache", true);
    locationHint = TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(new String[] { "127.0.0.1" })), null);
    Resource resource = Resource.newInstance(1024, 1);
    AppContext mockAppContext = appCtx;
    doReturn(new ClusterInfo()).when(mockAppContext).getClusterInfo();
    TaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, new SystemClock(), mock(TaskHeartbeatHandler.class), mockAppContext, false, resource, createFakeContainerContext(), false);
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    @SuppressWarnings("deprecation") ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    taImpl.handle(new TaskAttemptEventSchedule(taskAttemptID, 0, 0));
    // At state STARTING.
    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT));
    assertEquals(TaskAttemptStateInternal.KILL_IN_PROGRESS, taImpl.getInternalState());
    taImpl.handle(new TaskAttemptEventTezEventUpdate(taImpl.getID(), Collections.EMPTY_LIST));
    assertFalse("InternalError occurred trying to handle TA_TEZ_EVENT_UPDATE in KILL_IN_PROGRESS state", eventHandler.internalError);
    // At some KILLING state.
    taImpl.handle(new TaskAttemptEventKillRequest(taskAttemptID, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT));
    // taImpl.handle(new TaskAttemptEventContainerTerminating(taskAttemptID,
    // null));
    assertFalse(eventHandler.internalError);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Container(org.apache.hadoop.yarn.api.records.Container) TaskAttemptEventTezEventUpdate(org.apache.tez.dag.app.dag.event.TaskAttemptEventTezEventUpdate) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) TaskHeartbeatHandler(org.apache.tez.dag.app.TaskHeartbeatHandler) TaskAttemptEventSchedule(org.apache.tez.dag.app.dag.event.TaskAttemptEventSchedule) TezVertexID(org.apache.tez.dag.records.TezVertexID) HashSet(java.util.HashSet) TaskAttemptEventKillRequest(org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest) SystemClock(org.apache.hadoop.yarn.util.SystemClock) AppContext(org.apache.tez.dag.app.AppContext) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) TezTaskID(org.apache.tez.dag.records.TezTaskID) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test)

Example 3 with TaskAttemptEventKillRequest

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest in project tez by apache.

the class TestTaskAttempt method testKilledInNew.

@SuppressWarnings("deprecation")
@Test(timeout = 5000)
public void testKilledInNew() throws ServicePluginException {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    TezDAGID dagID = TezDAGID.getInstance(appId, 1);
    TezVertexID vertexID = TezVertexID.getInstance(dagID, 1);
    TezTaskID taskID = TezTaskID.getInstance(vertexID, 1);
    MockEventHandler eventHandler = spy(new MockEventHandler());
    TaskCommunicatorManagerInterface taListener = createMockTaskAttemptListener();
    Configuration taskConf = new Configuration();
    taskConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    taskConf.setBoolean("fs.file.impl.disable.cache", true);
    locationHint = TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(new String[] { "127.0.0.1" })), null);
    Resource resource = Resource.newInstance(1024, 1);
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    ContainerId contId = ContainerId.newInstance(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    AMContainerMap containers = new AMContainerMap(mock(ContainerHeartbeatHandler.class), mock(TaskCommunicatorManagerInterface.class), new ContainerContextMatcher(), appCtx);
    containers.addContainerIfNew(container, 0, 0, 0);
    doReturn(new ClusterInfo()).when(appCtx).getClusterInfo();
    doReturn(containers).when(appCtx).getAllContainers();
    TaskHeartbeatHandler mockHeartbeatHandler = mock(TaskHeartbeatHandler.class);
    MockTaskAttemptImpl taImpl = new MockTaskAttemptImpl(taskID, 1, eventHandler, taListener, taskConf, new SystemClock(), mockHeartbeatHandler, appCtx, false, resource, createFakeContainerContext(), true);
    Assert.assertEquals(TaskAttemptStateInternal.NEW, taImpl.getInternalState());
    taImpl.handle(new TaskAttemptEventKillRequest(taImpl.getID(), "kill it", TaskAttemptTerminationCause.TERMINATED_BY_CLIENT));
    Assert.assertEquals(TaskAttemptStateInternal.KILLED, taImpl.getInternalState());
    Assert.assertEquals(0, taImpl.taskAttemptStartedEventLogged);
    Assert.assertEquals(1, taImpl.taskAttemptFinishedEventLogged);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) SystemClock(org.apache.hadoop.yarn.util.SystemClock) Resource(org.apache.hadoop.yarn.api.records.Resource) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskCommunicatorManagerInterface(org.apache.tez.dag.app.TaskCommunicatorManagerInterface) AMContainerMap(org.apache.tez.dag.app.rm.container.AMContainerMap) ContainerContextMatcher(org.apache.tez.dag.app.rm.container.ContainerContextMatcher) TezTaskID(org.apache.tez.dag.records.TezTaskID) Container(org.apache.hadoop.yarn.api.records.Container) ClusterInfo(org.apache.tez.dag.app.ClusterInfo) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TezDAGID(org.apache.tez.dag.records.TezDAGID) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskHeartbeatHandler(org.apache.tez.dag.app.TaskHeartbeatHandler) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) TezVertexID(org.apache.tez.dag.records.TezVertexID) ContainerHeartbeatHandler(org.apache.tez.dag.app.ContainerHeartbeatHandler) HashSet(java.util.HashSet) TaskAttemptEventKillRequest(org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest) Test(org.junit.Test)

Example 4 with TaskAttemptEventKillRequest

use of org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest in project tez by apache.

the class TaskImpl method canCommit.

@Override
public boolean canCommit(TezTaskAttemptID taskAttemptID) {
    writeLock.lock();
    try {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Commit go/no-go request from " + taskAttemptID);
        }
        TaskState state = getState();
        if (state == TaskState.SCHEDULED) {
            // the actual running task ran and is done and asking for commit. we are still stuck
            // in the scheduled state which indicates a backlog in event processing. lets wait for the
            // backlog to clear. returning false will make the attempt come back to us.
            LOG.info("Event processing delay. " + "Attempt committing before state machine transitioned to running : Task {}", taskId);
            return false;
        }
        // have been in scheduled state in task impl.
        if (state != TaskState.RUNNING) {
            LOG.info("Task not running. Issuing kill to bad commit attempt " + taskAttemptID);
            eventHandler.handle(new TaskAttemptEventKillRequest(taskAttemptID, "Task not running. Bad attempt.", TaskAttemptTerminationCause.TERMINATED_ORPHANED));
            return false;
        }
        if (commitAttempt == null) {
            TaskAttempt ta = getAttempt(taskAttemptID);
            if (ta == null) {
                throw new TezUncheckedException("Unknown task for commit: " + taskAttemptID);
            }
            // Its ok to get a non-locked state snapshot since we handle changes of
            // state in the task attempt. Dont want to deadlock here.
            TaskAttemptState taState = ta.getStateNoLock();
            if (taState == TaskAttemptState.RUNNING) {
                commitAttempt = taskAttemptID;
                LOG.info(taskAttemptID + " given a go for committing the task output.");
                return true;
            } else {
                LOG.info(taskAttemptID + " with state: " + taState + " given a no-go for commit because its not running.");
                return false;
            }
        } else {
            if (commitAttempt.equals(taskAttemptID)) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(taskAttemptID + " already given a go for committing the task output.");
                }
                return true;
            }
            // succeeds then this and others will be killed
            if (LOG.isDebugEnabled()) {
                LOG.debug(commitAttempt + " is current committer. Commit waiting for:  " + taskAttemptID);
            }
            return false;
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : TezUncheckedException(org.apache.tez.dag.api.TezUncheckedException) TaskAttemptState(org.apache.tez.dag.api.oldrecords.TaskAttemptState) TaskAttempt(org.apache.tez.dag.app.dag.TaskAttempt) TaskState(org.apache.tez.dag.api.oldrecords.TaskState) TaskAttemptEventKillRequest(org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest)

Aggregations

TaskAttemptEventKillRequest (org.apache.tez.dag.app.dag.event.TaskAttemptEventKillRequest)4 HashSet (java.util.HashSet)2 Configuration (org.apache.hadoop.conf.Configuration)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 Container (org.apache.hadoop.yarn.api.records.Container)2 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)2 NodeId (org.apache.hadoop.yarn.api.records.NodeId)2 Resource (org.apache.hadoop.yarn.api.records.Resource)2 SystemClock (org.apache.hadoop.yarn.util.SystemClock)2 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)2 ClusterInfo (org.apache.tez.dag.app.ClusterInfo)2 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)2 TaskHeartbeatHandler (org.apache.tez.dag.app.TaskHeartbeatHandler)2 TezDAGID (org.apache.tez.dag.records.TezDAGID)2 TezTaskID (org.apache.tez.dag.records.TezTaskID)2 TezVertexID (org.apache.tez.dag.records.TezVertexID)2 Test (org.junit.Test)2 TezUncheckedException (org.apache.tez.dag.api.TezUncheckedException)1