Search in sources :

Example 6 with RMAppMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.

the class TestCapacityScheduler method waitForAppPreemptionInfo.

private void waitForAppPreemptionInfo(RMApp app, Resource preempted, int numAMPreempted, int numTaskPreempted, Resource currentAttemptPreempted, boolean currentAttemptAMPreempted, int numLatestAttemptTaskPreempted) throws InterruptedException {
    while (true) {
        RMAppMetrics appPM = app.getRMAppMetrics();
        RMAppAttemptMetrics attemptPM = app.getCurrentAppAttempt().getRMAppAttemptMetrics();
        if (appPM.getResourcePreempted().equals(preempted) && appPM.getNumAMContainersPreempted() == numAMPreempted && appPM.getNumNonAMContainersPreempted() == numTaskPreempted && attemptPM.getResourcePreempted().equals(currentAttemptPreempted) && app.getCurrentAppAttempt().getRMAppAttemptMetrics().getIsPreempted() == currentAttemptAMPreempted && attemptPM.getNumNonAMContainersPreempted() == numLatestAttemptTaskPreempted) {
            return;
        }
        Thread.sleep(500);
    }
}
Also used : RMAppAttemptMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics)

Example 7 with RMAppMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.

the class TestContainerResourceUsage method amRestartTests.

private void amRestartTests(boolean keepRunningContainers) throws Exception {
    MockRM rm = new MockRM(conf);
    rm.start();
    RMApp app = rm.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, keepRunningContainers);
    MockNM nm = new MockNM("127.0.0.1:1234", 10240, rm.getResourceTrackerService());
    nm.registerNode();
    MockAM am0 = MockRM.launchAndRegisterAM(app, rm, nm);
    int NUM_CONTAINERS = 1;
    // allocate NUM_CONTAINERS containers
    am0.allocate("127.0.0.1", 1024, NUM_CONTAINERS, new ArrayList<ContainerId>());
    nm.nodeHeartbeat(true);
    // wait for containers to be allocated.
    List<Container> containers = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (containers.size() != NUM_CONTAINERS) {
        nm.nodeHeartbeat(true);
        containers.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(200);
    }
    // launch the 2nd container.
    ContainerId containerId2 = ContainerId.newContainerId(am0.getApplicationAttemptId(), 2);
    nm.nodeHeartbeat(am0.getApplicationAttemptId(), containerId2.getContainerId(), ContainerState.RUNNING);
    rm.waitForState(nm, containerId2, RMContainerState.RUNNING);
    // Capture the containers here so the metrics can be calculated after the
    // app has completed.
    Collection<RMContainer> rmContainers = rm.scheduler.getSchedulerAppInfo(am0.getApplicationAttemptId()).getLiveContainers();
    // fail the first app attempt by sending CONTAINER_FINISHED event without
    // registering.
    ContainerId amContainerId = app.getCurrentAppAttempt().getMasterContainer().getId();
    nm.nodeHeartbeat(am0.getApplicationAttemptId(), amContainerId.getContainerId(), ContainerState.COMPLETE);
    rm.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    rm.drainEvents();
    long memorySeconds = 0;
    long vcoreSeconds = 0;
    // Calculate container usage metrics for first attempt.
    if (keepRunningContainers) {
        // Only calculate the usage for the one container that has completed.
        for (RMContainer c : rmContainers) {
            if (c.getContainerId().equals(amContainerId)) {
                AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
                memorySeconds += ru.getMemorySeconds();
                vcoreSeconds += ru.getVcoreSeconds();
            } else {
                // The remaining container should be RUNNING.
                Assert.assertTrue("After first attempt failed, remaining container " + "should still be running. ", c.getContainerState().equals(ContainerState.RUNNING));
            }
        }
    } else {
        // be completed. Calculate the resource usage metrics for all of them.
        for (RMContainer c : rmContainers) {
            waitforContainerCompletion(rm, nm, amContainerId, c);
            AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
            memorySeconds += ru.getMemorySeconds();
            vcoreSeconds += ru.getVcoreSeconds();
        }
    }
    // wait for app to start a new attempt.
    rm.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
    // assert this is a new AM.
    RMAppAttempt attempt2 = app.getCurrentAppAttempt();
    Assert.assertFalse(attempt2.getAppAttemptId().equals(am0.getApplicationAttemptId()));
    rm.waitForState(attempt2.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
    nm.nodeHeartbeat(true);
    MockAM am1 = rm.sendAMLaunched(attempt2.getAppAttemptId());
    am1.registerAppAttempt();
    rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
    // allocate NUM_CONTAINERS containers
    am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS, new ArrayList<ContainerId>());
    nm.nodeHeartbeat(true);
    // wait for containers to be allocated.
    containers = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (containers.size() != NUM_CONTAINERS) {
        nm.nodeHeartbeat(true);
        containers.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(200);
    }
    rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
    // Capture running containers for later use by metrics calculations.
    rmContainers = rm.scheduler.getSchedulerAppInfo(attempt2.getAppAttemptId()).getLiveContainers();
    // complete container by sending the container complete event which has
    // earlier attempt's attemptId
    amContainerId = app.getCurrentAppAttempt().getMasterContainer().getId();
    nm.nodeHeartbeat(am0.getApplicationAttemptId(), amContainerId.getContainerId(), ContainerState.COMPLETE);
    MockRM.finishAMAndVerifyAppState(app, rm, nm, am1);
    // Calculate container usage metrics for second attempt.
    for (RMContainer c : rmContainers) {
        waitforContainerCompletion(rm, nm, amContainerId, c);
        AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
        memorySeconds += ru.getMemorySeconds();
        vcoreSeconds += ru.getVcoreSeconds();
    }
    RMAppMetrics rmAppMetrics = app.getRMAppMetrics();
    Assert.assertEquals("Unexpected MemorySeconds value", memorySeconds, rmAppMetrics.getMemorySeconds());
    Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics.getVcoreSeconds());
    rm.stop();
    return;
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics) ArrayList(java.util.ArrayList) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) AggregateAppResourceUsage(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage)

Example 8 with RMAppMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.

the class TestContainerResourceUsage method testUsageWithMultipleContainersAndRMRestart.

@Test(timeout = 120000)
public void testUsageWithMultipleContainersAndRMRestart() throws Exception {
    // Set max attempts to 1 so that when the first attempt fails, the app
    // won't try to start a new one.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm0 = new MockRM(conf, memStore);
    rm0.start();
    MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.getResourceTrackerService());
    nm.registerNode();
    RMApp app0 = rm0.submitApp(200);
    rm0.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
    RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
    ApplicationAttemptId attemptId0 = attempt0.getAppAttemptId();
    rm0.waitForState(attemptId0, RMAppAttemptState.SCHEDULED);
    nm.nodeHeartbeat(true);
    rm0.waitForState(attemptId0, RMAppAttemptState.ALLOCATED);
    MockAM am0 = rm0.sendAMLaunched(attempt0.getAppAttemptId());
    am0.registerAppAttempt();
    int NUM_CONTAINERS = 2;
    am0.allocate("127.0.0.1", 1000, NUM_CONTAINERS, new ArrayList<ContainerId>());
    nm.nodeHeartbeat(true);
    List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() != NUM_CONTAINERS) {
        nm.nodeHeartbeat(true);
        conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // launch the 2nd and 3rd containers.
    for (Container c : conts) {
        nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.RUNNING);
        rm0.waitForState(nm, c.getId(), RMContainerState.RUNNING);
    }
    // Get the RMContainers for all of the live containers, to be used later
    // for metrics calculations and comparisons.
    Collection<RMContainer> rmContainers = rm0.scheduler.getSchedulerAppInfo(attempt0.getAppAttemptId()).getLiveContainers();
    // Allow metrics to accumulate.
    int sleepInterval = 1000;
    int cumulativeSleepTime = 0;
    while (app0.getRMAppMetrics().getMemorySeconds() <= 0 && cumulativeSleepTime < 5000) {
        Thread.sleep(sleepInterval);
        cumulativeSleepTime += sleepInterval;
    }
    // Stop all non-AM containers
    for (Container c : conts) {
        if (c.getId().getContainerId() == 1)
            continue;
        nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.COMPLETE);
        rm0.waitForState(nm, c.getId(), RMContainerState.COMPLETED);
    }
    // After all other containers have completed, manually complete the master
    // container in order to trigger a save to the state store of the resource
    // usage metrics. This will cause the attempt to fail, and, since the max
    // attempt retries is 1, the app will also fail. This is intentional so
    // that all containers will complete prior to saving.
    ContainerId cId = ContainerId.newContainerId(attempt0.getAppAttemptId(), 1);
    nm.nodeHeartbeat(attempt0.getAppAttemptId(), cId.getContainerId(), ContainerState.COMPLETE);
    rm0.waitForState(nm, cId, RMContainerState.COMPLETED);
    // Check that the container metrics match those from the app usage report.
    long memorySeconds = 0;
    long vcoreSeconds = 0;
    for (RMContainer c : rmContainers) {
        AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
        memorySeconds += ru.getMemorySeconds();
        vcoreSeconds += ru.getVcoreSeconds();
    }
    RMAppMetrics metricsBefore = app0.getRMAppMetrics();
    Assert.assertEquals("Unexpected MemorySeconds value", memorySeconds, metricsBefore.getMemorySeconds());
    Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore.getVcoreSeconds());
    // create new RM to represent RM restart. Load up the state store.
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    RMApp app0After = rm1.getRMContext().getRMApps().get(app0.getApplicationId());
    // Compare container resource usage metrics from before and after restart.
    RMAppMetrics metricsAfter = app0After.getRMAppMetrics();
    Assert.assertEquals("Vcore seconds were not the same after RM Restart", metricsBefore.getVcoreSeconds(), metricsAfter.getVcoreSeconds());
    Assert.assertEquals("Memory seconds were not the same after RM Restart", metricsBefore.getMemorySeconds(), metricsAfter.getMemorySeconds());
    rm0.stop();
    rm0.close();
    rm1.stop();
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) AggregateAppResourceUsage(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage) Test(org.junit.Test)

Example 9 with RMAppMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.

the class TestAppManager method testEscapeApplicationSummary.

@Test(timeout = 30000)
public void testEscapeApplicationSummary() {
    RMApp app = mock(RMAppImpl.class);
    when(app.getApplicationId()).thenReturn(ApplicationId.newInstance(100L, 1));
    when(app.getName()).thenReturn("Multiline\n\n\r\rAppName");
    when(app.getUser()).thenReturn("Multiline\n\n\r\rUserName");
    when(app.getQueue()).thenReturn("Multiline\n\n\r\rQueueName");
    when(app.getState()).thenReturn(RMAppState.RUNNING);
    when(app.getApplicationType()).thenReturn("MAPREDUCE");
    RMAppMetrics metrics = new RMAppMetrics(Resource.newInstance(1234, 56), 10, 1, 16384, 64, 0, 0);
    when(app.getRMAppMetrics()).thenReturn(metrics);
    RMAppManager.ApplicationSummary.SummaryBuilder summary = new RMAppManager.ApplicationSummary().createAppSummary(app);
    String msg = summary.toString();
    LOG.info("summary: " + msg);
    Assert.assertFalse(msg.contains("\n"));
    Assert.assertFalse(msg.contains("\r"));
    String escaped = "\\n\\n\\r\\r";
    Assert.assertTrue(msg.contains("Multiline" + escaped + "AppName"));
    Assert.assertTrue(msg.contains("Multiline" + escaped + "UserName"));
    Assert.assertTrue(msg.contains("Multiline" + escaped + "QueueName"));
    Assert.assertTrue(msg.contains("memorySeconds=16384"));
    Assert.assertTrue(msg.contains("vcoreSeconds=64"));
    Assert.assertTrue(msg.contains("preemptedAMContainers=1"));
    Assert.assertTrue(msg.contains("preemptedNonAMContainers=10"));
    Assert.assertTrue(msg.contains("preemptedResources=<memory:1234\\, vCores:56>"));
    Assert.assertTrue(msg.contains("applicationType=MAPREDUCE"));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics) Test(org.junit.Test)

Example 10 with RMAppMetrics

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.

the class TestRMWebAppFairScheduler method mockRMContext.

private static RMContext mockRMContext(List<RMAppState> states) {
    final ConcurrentMap<ApplicationId, RMApp> applicationsMaps = Maps.newConcurrentMap();
    int i = 0;
    for (RMAppState state : states) {
        MockRMApp app = new MockRMApp(i, i, state) {

            @Override
            public RMAppMetrics getRMAppMetrics() {
                return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0);
            }

            @Override
            public YarnApplicationState createApplicationState() {
                return YarnApplicationState.ACCEPTED;
            }
        };
        RMAppAttempt attempt = mock(RMAppAttempt.class);
        app.setCurrentAppAttempt(attempt);
        applicationsMaps.put(app.getApplicationId(), app);
        i++;
    }
    RMContextImpl rmContext = new RMContextImpl(null, null, null, null, null, null, null, null, null, null) {

        @Override
        public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
            return applicationsMaps;
        }

        @Override
        public ResourceScheduler getScheduler() {
            return mock(AbstractYarnScheduler.class);
        }
    };
    return rmContext;
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) RMAppState(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMAppMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMContextImpl(org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl)

Aggregations

RMAppMetrics (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics)12 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)9 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)6 Test (org.junit.Test)4 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)3 AggregateAppResourceUsage (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage)3 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)2 Container (org.apache.hadoop.yarn.api.records.Container)2 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)2 MockRMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp)2 RMAppAttemptMetrics (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics)2 Binder (com.google.inject.Binder)1 Injector (com.google.inject.Injector)1 Module (com.google.inject.Module)1