Search in sources :

Example 46 with Task

use of org.apache.hadoop.mapreduce.v2.app.job.Task in project hadoop by apache.

the class TestTaskImpl method testSpeculativeMapFetchFailure.

@Test
public void testSpeculativeMapFetchFailure() {
    // Setup a scenario where speculative task wins, first attempt killed
    mockTask = createMockTask(TaskType.MAP);
    runSpeculativeTaskAttemptSucceeds(TaskEventType.T_ATTEMPT_KILLED);
    assertEquals(2, taskAttempts.size());
    // speculative attempt retroactively fails from fetch failures
    mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(1).getAttemptId(), TaskEventType.T_ATTEMPT_FAILED));
    assertTaskScheduledState();
    assertEquals(3, taskAttempts.size());
}
Also used : TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Test(org.junit.Test)

Example 47 with Task

use of org.apache.hadoop.mapreduce.v2.app.job.Task in project hadoop by apache.

the class TestTaskImpl method testTaskProgress.

@Test
public void testTaskProgress() {
    LOG.info("--- START: testTaskProgress ---");
    mockTask = createMockTask(TaskType.MAP);
    // launch task
    TaskId taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    float progress = 0f;
    assert (mockTask.getProgress() == progress);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    // update attempt1 
    progress = 50f;
    updateLastAttemptProgress(progress);
    assert (mockTask.getProgress() == progress);
    progress = 100f;
    updateLastAttemptProgress(progress);
    assert (mockTask.getProgress() == progress);
    progress = 0f;
    // mark first attempt as killed
    updateLastAttemptState(TaskAttemptState.KILLED);
    assert (mockTask.getProgress() == progress);
    // kill first attempt 
    // should trigger a new attempt
    // as no successful attempts 
    killRunningTaskAttempt(getLastAttempt().getAttemptId());
    assert (taskAttempts.size() == 2);
    assert (mockTask.getProgress() == 0f);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    progress = 50f;
    updateLastAttemptProgress(progress);
    assert (mockTask.getProgress() == progress);
}
Also used : TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Test(org.junit.Test)

Example 48 with Task

use of org.apache.hadoop.mapreduce.v2.app.job.Task in project hadoop by apache.

the class TestTaskImpl method testFailureDuringTaskAttemptCommit.

@Test
public void testFailureDuringTaskAttemptCommit() {
    mockTask = createMockTask(TaskType.MAP);
    TaskId taskId = getNewTaskID();
    scheduleTaskAttempt(taskId);
    launchTaskAttempt(getLastAttempt().getAttemptId());
    updateLastAttemptState(TaskAttemptState.COMMIT_PENDING);
    commitTaskAttempt(getLastAttempt().getAttemptId());
    // During the task attempt commit there is an exception which causes
    // the attempt to fail
    updateLastAttemptState(TaskAttemptState.FAILED);
    failRunningTaskAttempt(getLastAttempt().getAttemptId());
    assertEquals(2, taskAttempts.size());
    updateLastAttemptState(TaskAttemptState.SUCCEEDED);
    commitTaskAttempt(getLastAttempt().getAttemptId());
    mockTask.handle(new TaskTAttemptEvent(getLastAttempt().getAttemptId(), TaskEventType.T_ATTEMPT_SUCCEEDED));
    assertFalse("First attempt should not commit", mockTask.canCommit(taskAttempts.get(0).getAttemptId()));
    assertTrue("Second attempt should commit", mockTask.canCommit(getLastAttempt().getAttemptId()));
    assertTaskSucceededState();
}
Also used : TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskTAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent) Test(org.junit.Test)

Example 49 with Task

use of org.apache.hadoop.mapreduce.v2.app.job.Task in project hadoop by apache.

the class TestContainerLauncher method testSlowNM.

@Test(timeout = 15000)
public void testSlowNM() throws Exception {
    conf = new Configuration();
    int maxAttempts = 1;
    conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    // set timeout low for the test
    conf.setInt("yarn.rpc.nm-command-timeout", 3000);
    conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class.getName());
    YarnRPC rpc = YarnRPC.create(conf);
    String bindAddr = "localhost:0";
    InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
    NMTokenSecretManagerInNM tokenSecretManager = new NMTokenSecretManagerInNM();
    MasterKey masterKey = Records.newRecord(MasterKey.class);
    masterKey.setBytes(ByteBuffer.wrap("key".getBytes()));
    tokenSecretManager.setMasterKey(masterKey);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "token");
    server = rpc.getServer(ContainerManagementProtocol.class, new DummyContainerManager(), addr, conf, tokenSecretManager, 1);
    server.start();
    MRApp app = new MRAppWithSlowNM(tokenSecretManager);
    try {
        Job job = app.submit(conf);
        app.waitForState(job, JobState.RUNNING);
        Map<TaskId, Task> tasks = job.getTasks();
        Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
        Task task = tasks.values().iterator().next();
        app.waitForState(task, TaskState.SCHEDULED);
        Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
        Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size());
        TaskAttempt attempt = attempts.values().iterator().next();
        app.waitForInternalState((TaskAttemptImpl) attempt, TaskAttemptStateInternal.ASSIGNED);
        app.waitForState(job, JobState.FAILED);
        String diagnostics = attempt.getDiagnostics().toString();
        LOG.info("attempt.getDiagnostics: " + diagnostics);
        Assert.assertTrue(diagnostics.contains("Container launch failed for " + "container_0_0000_01_000000 : "));
        Assert.assertTrue(diagnostics.contains("java.net.SocketTimeoutException: 3000 millis timeout while waiting for channel"));
    } finally {
        server.stop();
        app.stop();
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InetSocketAddress(java.net.InetSocketAddress) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) NMTokenSecretManagerInNM(org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM) YarnRPC(org.apache.hadoop.yarn.ipc.YarnRPC) HadoopYarnProtoRPC(org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC) ContainerManagementProtocol(org.apache.hadoop.yarn.api.ContainerManagementProtocol) MasterKey(org.apache.hadoop.yarn.server.api.records.MasterKey) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Test(org.junit.Test)

Example 50 with Task

use of org.apache.hadoop.mapreduce.v2.app.job.Task in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

Task (org.apache.hadoop.mapreduce.v2.app.job.Task)157 Test (org.junit.Test)153 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)150 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)107 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)94 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)79 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)73 Configuration (org.apache.hadoop.conf.Configuration)68 ClientResponse (com.sun.jersey.api.client.ClientResponse)56 WebResource (com.sun.jersey.api.client.WebResource)56 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)52 JSONObject (org.codehaus.jettison.json.JSONObject)46 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)25 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)23 Path (org.apache.hadoop.fs.Path)22 MapTaskAttemptImpl (org.apache.hadoop.mapred.MapTaskAttemptImpl)20 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 HashMap (java.util.HashMap)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)19 JobConf (org.apache.hadoop.mapred.JobConf)16