Search in sources :

Example 51 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TestContainerLauncher method testSlowNM.

@Test(timeout = 15000)
public void testSlowNM() throws Exception {
    conf = new Configuration();
    int maxAttempts = 1;
    conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts);
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    // set timeout low for the test
    conf.setInt("yarn.rpc.nm-command-timeout", 3000);
    conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class.getName());
    YarnRPC rpc = YarnRPC.create(conf);
    String bindAddr = "localhost:0";
    InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
    NMTokenSecretManagerInNM tokenSecretManager = new NMTokenSecretManagerInNM();
    MasterKey masterKey = Records.newRecord(MasterKey.class);
    masterKey.setBytes(ByteBuffer.wrap("key".getBytes()));
    tokenSecretManager.setMasterKey(masterKey);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "token");
    server = rpc.getServer(ContainerManagementProtocol.class, new DummyContainerManager(), addr, conf, tokenSecretManager, 1);
    server.start();
    MRApp app = new MRAppWithSlowNM(tokenSecretManager);
    try {
        Job job = app.submit(conf);
        app.waitForState(job, JobState.RUNNING);
        Map<TaskId, Task> tasks = job.getTasks();
        Assert.assertEquals("Num tasks is not correct", 1, tasks.size());
        Task task = tasks.values().iterator().next();
        app.waitForState(task, TaskState.SCHEDULED);
        Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
        Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size());
        TaskAttempt attempt = attempts.values().iterator().next();
        app.waitForInternalState((TaskAttemptImpl) attempt, TaskAttemptStateInternal.ASSIGNED);
        app.waitForState(job, JobState.FAILED);
        String diagnostics = attempt.getDiagnostics().toString();
        LOG.info("attempt.getDiagnostics: " + diagnostics);
        Assert.assertTrue(diagnostics.contains("Container launch failed for " + "container_0_0000_01_000000 : "));
        Assert.assertTrue(diagnostics.contains("java.net.SocketTimeoutException: 3000 millis timeout while waiting for channel"));
    } finally {
        server.stop();
        app.stop();
    }
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InetSocketAddress(java.net.InetSocketAddress) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) NMTokenSecretManagerInNM(org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM) YarnRPC(org.apache.hadoop.yarn.ipc.YarnRPC) HadoopYarnProtoRPC(org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC) ContainerManagementProtocol(org.apache.hadoop.yarn.api.ContainerManagementProtocol) MasterKey(org.apache.hadoop.yarn.server.api.records.MasterKey) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Test(org.junit.Test)

Example 52 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TestRMContainerAllocator method testExcludeSchedReducesFromHeadroom.

/**
   * Tests whether scheduled reducers are excluded from headroom while
   * calculating headroom.
   */
@Test
public void testExcludeSchedReducesFromHeadroom() throws Exception {
    LOG.info("Running testExcludeSchedReducesFromHeadroom");
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.MR_JOB_REDUCER_UNCONDITIONAL_PREEMPT_DELAY_SEC, -1);
    MyResourceManager rm = new MyResourceManager(conf);
    rm.start();
    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext().getDispatcher();
    // Submit the application
    RMApp app = rm.submitApp(1024);
    dispatcher.await();
    MockNM amNodeManager = rm.registerNode("amNM:1234", 1260);
    amNodeManager.nodeHeartbeat(true);
    dispatcher.await();
    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
    rm.sendAMLaunched(appAttemptId);
    dispatcher.await();
    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
    Job mockJob = mock(Job.class);
    when(mockJob.getReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "jobfile", null, false, ""));
    Task mockTask = mock(Task.class);
    TaskAttempt mockTaskAttempt = mock(TaskAttempt.class);
    when(mockJob.getTask((TaskId) any())).thenReturn(mockTask);
    when(mockTask.getAttempt((TaskAttemptId) any())).thenReturn(mockTaskAttempt);
    when(mockTaskAttempt.getProgress()).thenReturn(0.01f);
    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob);
    MockNM nodeManager = rm.registerNode("h1:1234", 4096);
    dispatcher.await();
    // Register nodes to RM.
    MockNM nodeManager2 = rm.registerNode("h2:1234", 1024);
    dispatcher.await();
    // Request 2 maps and 1 reducer(sone on nodes which are not registered).
    ContainerRequestEvent event1 = createReq(jobId, 1, 1024, new String[] { "h1" });
    allocator.sendRequest(event1);
    ContainerRequestEvent event2 = createReq(jobId, 2, 1024, new String[] { "h2" });
    allocator.sendRequest(event2);
    ContainerRequestEvent event3 = createReq(jobId, 3, 1024, new String[] { "h1" }, false, true);
    allocator.sendRequest(event3);
    // This will tell the scheduler about the requests but there will be no
    // allocations as nodes are not added.
    allocator.schedule();
    dispatcher.await();
    // Request for another reducer on h3 which has not registered.
    ContainerRequestEvent event4 = createReq(jobId, 4, 1024, new String[] { "h3" }, false, true);
    allocator.sendRequest(event4);
    allocator.schedule();
    dispatcher.await();
    // Update resources in scheduler through node heartbeat from h1.
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(3072, 3));
    allocator.schedule();
    dispatcher.await();
    // Two maps are assigned.
    Assert.assertEquals(2, allocator.getAssignedRequests().maps.size());
    // Send deallocate request for map so that no maps are assigned after this.
    ContainerAllocatorEvent deallocate1 = createDeallocateEvent(jobId, 1, false);
    allocator.sendDeallocate(deallocate1);
    ContainerAllocatorEvent deallocate2 = createDeallocateEvent(jobId, 2, false);
    allocator.sendDeallocate(deallocate2);
    // No map should be assigned.
    Assert.assertEquals(0, allocator.getAssignedRequests().maps.size());
    nodeManager.nodeHeartbeat(true);
    dispatcher.await();
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1024, 1));
    allocator.schedule();
    dispatcher.await();
    // h2 heartbeats.
    nodeManager2.nodeHeartbeat(true);
    dispatcher.await();
    // Send request for one more mapper.
    ContainerRequestEvent event5 = createReq(jobId, 5, 1024, new String[] { "h1" });
    allocator.sendRequest(event5);
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(2048, 2));
    allocator.schedule();
    dispatcher.await();
    // One reducer is assigned and one map is scheduled
    Assert.assertEquals(1, allocator.getScheduledRequests().maps.size());
    Assert.assertEquals(1, allocator.getAssignedRequests().reduces.size());
    // Headroom enough to run a mapper if headroom is taken as it is but wont be
    // enough if scheduled reducers resources are deducted.
    rm.getMyFifoScheduler().forceResourceLimit(Resource.newInstance(1260, 2));
    allocator.schedule();
    dispatcher.await();
    // After allocate response, the one assigned reducer is preempted and killed
    Assert.assertEquals(1, MyContainerAllocator.getTaskAttemptKillEvents().size());
    Assert.assertEquals(RMContainerAllocator.RAMPDOWN_DIAGNOSTIC, MyContainerAllocator.getTaskAttemptKillEvents().get(0).getMessage());
    Assert.assertEquals(1, allocator.getNumOfPendingReduces());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 53 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TestTaskAttempt method testAppDiognosticEventOnNewTask.

@Test
public void testAppDiognosticEventOnNewTask() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    JobId jobId = MRBuilderUtils.newJobId(appId, 1);
    TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
    Path jobFile = mock(Path.class);
    MockEventHandler eventHandler = new MockEventHandler();
    TaskAttemptListener taListener = mock(TaskAttemptListener.class);
    when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
    JobConf jobConf = new JobConf();
    jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    jobConf.setBoolean("fs.file.impl.disable.cache", true);
    jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
    jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
    TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
    when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
    AppContext appCtx = mock(AppContext.class);
    ClusterInfo clusterInfo = mock(ClusterInfo.class);
    Resource resource = mock(Resource.class);
    when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
    when(resource.getMemorySize()).thenReturn(1024L);
    setupTaskAttemptFinishingMonitor(eventHandler, jobConf, appCtx);
    TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
    NodeId nid = NodeId.newInstance("127.0.0.1", 0);
    ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId, "Task got killed"));
    assertFalse("InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError);
}
Also used : Path(org.apache.hadoop.fs.Path) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptListener(org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener) InetSocketAddress(java.net.InetSocketAddress) TaskAttemptDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) Resource(org.apache.hadoop.yarn.api.records.Resource) MapTaskAttemptImpl(org.apache.hadoop.mapred.MapTaskAttemptImpl) Token(org.apache.hadoop.security.token.Token) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ClusterInfo(org.apache.hadoop.mapreduce.v2.app.ClusterInfo) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskSplitMetaInfo(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) MapTaskAttemptImpl(org.apache.hadoop.mapred.MapTaskAttemptImpl) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 54 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TestTaskAttempt method testContainerKillWhileRunning.

@Test
public void testContainerKillWhileRunning() throws Exception {
    ApplicationId appId = ApplicationId.newInstance(1, 2);
    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0);
    JobId jobId = MRBuilderUtils.newJobId(appId, 1);
    TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
    Path jobFile = mock(Path.class);
    MockEventHandler eventHandler = new MockEventHandler();
    TaskAttemptListener taListener = mock(TaskAttemptListener.class);
    when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
    JobConf jobConf = new JobConf();
    jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
    jobConf.setBoolean("fs.file.impl.disable.cache", true);
    jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
    jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
    TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
    when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" });
    AppContext appCtx = mock(AppContext.class);
    ClusterInfo clusterInfo = mock(ClusterInfo.class);
    Resource resource = mock(Resource.class);
    when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
    when(resource.getMemorySize()).thenReturn(1024L);
    TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), SystemClock.getInstance(), appCtx);
    NodeId nid = NodeId.newInstance("127.0.0.2", 0);
    ContainerId contId = ContainerId.newContainerId(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
    when(container.getNodeId()).thenReturn(nid);
    when(container.getNodeHttpAddress()).thenReturn("localhost:0");
    taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE));
    taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class)));
    taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0));
    assertEquals("Task attempt is not in running state", taImpl.getState(), TaskAttemptState.RUNNING);
    taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL));
    assertFalse("InternalError occurred trying to handle TA_KILL", eventHandler.internalError);
    assertEquals("Task should be in KILL_CONTAINER_CLEANUP state", TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, taImpl.getInternalState());
}
Also used : Path(org.apache.hadoop.fs.Path) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) TaskAttemptListener(org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener) InetSocketAddress(java.net.InetSocketAddress) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) Resource(org.apache.hadoop.yarn.api.records.Resource) MapTaskAttemptImpl(org.apache.hadoop.mapred.MapTaskAttemptImpl) Token(org.apache.hadoop.security.token.Token) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) TaskAttemptContainerAssignedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent) TaskAttemptContainerLaunchedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent) ClusterInfo(org.apache.hadoop.mapreduce.v2.app.ClusterInfo) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) TaskSplitMetaInfo(org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) MapTaskAttemptImpl(org.apache.hadoop.mapred.MapTaskAttemptImpl) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 55 with TaskId

use of org.apache.hadoop.mapreduce.v2.api.records.TaskId in project hadoop by apache.

the class TestTaskAttempt method testTaskAttemptAssignedFailHistory.

private void testTaskAttemptAssignedFailHistory(FailingAttemptsDuringAssignedMRApp app) throws Exception {
    Configuration conf = new Configuration();
    Job job = app.submit(conf);
    app.waitForState(job, JobState.FAILED);
    Map<TaskId, Task> tasks = job.getTasks();
    Assert.assertTrue("No Ta Started JH Event", app.getTaStartJHEvent());
    Assert.assertTrue("No Ta Failed JH Event", app.getTaFailedJHEvent());
}
Also used : Task(org.apache.hadoop.mapreduce.v2.app.job.Task) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Job(org.apache.hadoop.mapreduce.v2.app.job.Job)

Aggregations

TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)113 Test (org.junit.Test)75 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)69 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)60 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)58 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)56 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)42 Configuration (org.apache.hadoop.conf.Configuration)29 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)24 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)24 Path (org.apache.hadoop.fs.Path)23 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)22 HashMap (java.util.HashMap)20 JobConf (org.apache.hadoop.mapred.JobConf)17 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)17 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)17 MapTaskAttemptImpl (org.apache.hadoop.mapred.MapTaskAttemptImpl)16 TaskAttemptListener (org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener)16 InetSocketAddress (java.net.InetSocketAddress)15 TaskSplitMetaInfo (org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo)15