use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.
the class TestCapacityScheduler method waitForAppPreemptionInfo.
private void waitForAppPreemptionInfo(RMApp app, Resource preempted, int numAMPreempted, int numTaskPreempted, Resource currentAttemptPreempted, boolean currentAttemptAMPreempted, int numLatestAttemptTaskPreempted) throws InterruptedException {
while (true) {
RMAppMetrics appPM = app.getRMAppMetrics();
RMAppAttemptMetrics attemptPM = app.getCurrentAppAttempt().getRMAppAttemptMetrics();
if (appPM.getResourcePreempted().equals(preempted) && appPM.getNumAMContainersPreempted() == numAMPreempted && appPM.getNumNonAMContainersPreempted() == numTaskPreempted && attemptPM.getResourcePreempted().equals(currentAttemptPreempted) && app.getCurrentAppAttempt().getRMAppAttemptMetrics().getIsPreempted() == currentAttemptAMPreempted && attemptPM.getNumNonAMContainersPreempted() == numLatestAttemptTaskPreempted) {
return;
}
Thread.sleep(500);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.
the class TestContainerResourceUsage method amRestartTests.
private void amRestartTests(boolean keepRunningContainers) throws Exception {
MockRM rm = new MockRM(conf);
rm.start();
RMApp app = rm.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, keepRunningContainers);
MockNM nm = new MockNM("127.0.0.1:1234", 10240, rm.getResourceTrackerService());
nm.registerNode();
MockAM am0 = MockRM.launchAndRegisterAM(app, rm, nm);
int NUM_CONTAINERS = 1;
// allocate NUM_CONTAINERS containers
am0.allocate("127.0.0.1", 1024, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm.nodeHeartbeat(true);
// wait for containers to be allocated.
List<Container> containers = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (containers.size() != NUM_CONTAINERS) {
nm.nodeHeartbeat(true);
containers.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(200);
}
// launch the 2nd container.
ContainerId containerId2 = ContainerId.newContainerId(am0.getApplicationAttemptId(), 2);
nm.nodeHeartbeat(am0.getApplicationAttemptId(), containerId2.getContainerId(), ContainerState.RUNNING);
rm.waitForState(nm, containerId2, RMContainerState.RUNNING);
// Capture the containers here so the metrics can be calculated after the
// app has completed.
Collection<RMContainer> rmContainers = rm.scheduler.getSchedulerAppInfo(am0.getApplicationAttemptId()).getLiveContainers();
// fail the first app attempt by sending CONTAINER_FINISHED event without
// registering.
ContainerId amContainerId = app.getCurrentAppAttempt().getMasterContainer().getId();
nm.nodeHeartbeat(am0.getApplicationAttemptId(), amContainerId.getContainerId(), ContainerState.COMPLETE);
rm.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm.drainEvents();
long memorySeconds = 0;
long vcoreSeconds = 0;
// Calculate container usage metrics for first attempt.
if (keepRunningContainers) {
// Only calculate the usage for the one container that has completed.
for (RMContainer c : rmContainers) {
if (c.getContainerId().equals(amContainerId)) {
AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
memorySeconds += ru.getMemorySeconds();
vcoreSeconds += ru.getVcoreSeconds();
} else {
// The remaining container should be RUNNING.
Assert.assertTrue("After first attempt failed, remaining container " + "should still be running. ", c.getContainerState().equals(ContainerState.RUNNING));
}
}
} else {
// be completed. Calculate the resource usage metrics for all of them.
for (RMContainer c : rmContainers) {
waitforContainerCompletion(rm, nm, amContainerId, c);
AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
memorySeconds += ru.getMemorySeconds();
vcoreSeconds += ru.getVcoreSeconds();
}
}
// wait for app to start a new attempt.
rm.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
// assert this is a new AM.
RMAppAttempt attempt2 = app.getCurrentAppAttempt();
Assert.assertFalse(attempt2.getAppAttemptId().equals(am0.getApplicationAttemptId()));
rm.waitForState(attempt2.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
nm.nodeHeartbeat(true);
MockAM am1 = rm.sendAMLaunched(attempt2.getAppAttemptId());
am1.registerAppAttempt();
rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
// allocate NUM_CONTAINERS containers
am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm.nodeHeartbeat(true);
// wait for containers to be allocated.
containers = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (containers.size() != NUM_CONTAINERS) {
nm.nodeHeartbeat(true);
containers.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(200);
}
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
// Capture running containers for later use by metrics calculations.
rmContainers = rm.scheduler.getSchedulerAppInfo(attempt2.getAppAttemptId()).getLiveContainers();
// complete container by sending the container complete event which has
// earlier attempt's attemptId
amContainerId = app.getCurrentAppAttempt().getMasterContainer().getId();
nm.nodeHeartbeat(am0.getApplicationAttemptId(), amContainerId.getContainerId(), ContainerState.COMPLETE);
MockRM.finishAMAndVerifyAppState(app, rm, nm, am1);
// Calculate container usage metrics for second attempt.
for (RMContainer c : rmContainers) {
waitforContainerCompletion(rm, nm, amContainerId, c);
AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
memorySeconds += ru.getMemorySeconds();
vcoreSeconds += ru.getVcoreSeconds();
}
RMAppMetrics rmAppMetrics = app.getRMAppMetrics();
Assert.assertEquals("Unexpected MemorySeconds value", memorySeconds, rmAppMetrics.getMemorySeconds());
Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, rmAppMetrics.getVcoreSeconds());
rm.stop();
return;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.
the class TestContainerResourceUsage method testUsageWithMultipleContainersAndRMRestart.
@Test(timeout = 120000)
public void testUsageWithMultipleContainersAndRMRestart() throws Exception {
// Set max attempts to 1 so that when the first attempt fails, the app
// won't try to start a new one.
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm0 = new MockRM(conf, memStore);
rm0.start();
MockNM nm = new MockNM("127.0.0.1:1234", 65536, rm0.getResourceTrackerService());
nm.registerNode();
RMApp app0 = rm0.submitApp(200);
rm0.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
ApplicationAttemptId attemptId0 = attempt0.getAppAttemptId();
rm0.waitForState(attemptId0, RMAppAttemptState.SCHEDULED);
nm.nodeHeartbeat(true);
rm0.waitForState(attemptId0, RMAppAttemptState.ALLOCATED);
MockAM am0 = rm0.sendAMLaunched(attempt0.getAppAttemptId());
am0.registerAppAttempt();
int NUM_CONTAINERS = 2;
am0.allocate("127.0.0.1", 1000, NUM_CONTAINERS, new ArrayList<ContainerId>());
nm.nodeHeartbeat(true);
List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
while (conts.size() != NUM_CONTAINERS) {
nm.nodeHeartbeat(true);
conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
Thread.sleep(500);
}
// launch the 2nd and 3rd containers.
for (Container c : conts) {
nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.RUNNING);
rm0.waitForState(nm, c.getId(), RMContainerState.RUNNING);
}
// Get the RMContainers for all of the live containers, to be used later
// for metrics calculations and comparisons.
Collection<RMContainer> rmContainers = rm0.scheduler.getSchedulerAppInfo(attempt0.getAppAttemptId()).getLiveContainers();
// Allow metrics to accumulate.
int sleepInterval = 1000;
int cumulativeSleepTime = 0;
while (app0.getRMAppMetrics().getMemorySeconds() <= 0 && cumulativeSleepTime < 5000) {
Thread.sleep(sleepInterval);
cumulativeSleepTime += sleepInterval;
}
// Stop all non-AM containers
for (Container c : conts) {
if (c.getId().getContainerId() == 1)
continue;
nm.nodeHeartbeat(attempt0.getAppAttemptId(), c.getId().getContainerId(), ContainerState.COMPLETE);
rm0.waitForState(nm, c.getId(), RMContainerState.COMPLETED);
}
// After all other containers have completed, manually complete the master
// container in order to trigger a save to the state store of the resource
// usage metrics. This will cause the attempt to fail, and, since the max
// attempt retries is 1, the app will also fail. This is intentional so
// that all containers will complete prior to saving.
ContainerId cId = ContainerId.newContainerId(attempt0.getAppAttemptId(), 1);
nm.nodeHeartbeat(attempt0.getAppAttemptId(), cId.getContainerId(), ContainerState.COMPLETE);
rm0.waitForState(nm, cId, RMContainerState.COMPLETED);
// Check that the container metrics match those from the app usage report.
long memorySeconds = 0;
long vcoreSeconds = 0;
for (RMContainer c : rmContainers) {
AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c);
memorySeconds += ru.getMemorySeconds();
vcoreSeconds += ru.getVcoreSeconds();
}
RMAppMetrics metricsBefore = app0.getRMAppMetrics();
Assert.assertEquals("Unexpected MemorySeconds value", memorySeconds, metricsBefore.getMemorySeconds());
Assert.assertEquals("Unexpected VcoreSeconds value", vcoreSeconds, metricsBefore.getVcoreSeconds());
// create new RM to represent RM restart. Load up the state store.
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
RMApp app0After = rm1.getRMContext().getRMApps().get(app0.getApplicationId());
// Compare container resource usage metrics from before and after restart.
RMAppMetrics metricsAfter = app0After.getRMAppMetrics();
Assert.assertEquals("Vcore seconds were not the same after RM Restart", metricsBefore.getVcoreSeconds(), metricsAfter.getVcoreSeconds());
Assert.assertEquals("Memory seconds were not the same after RM Restart", metricsBefore.getMemorySeconds(), metricsAfter.getMemorySeconds());
rm0.stop();
rm0.close();
rm1.stop();
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.
the class TestAppManager method testEscapeApplicationSummary.
@Test(timeout = 30000)
public void testEscapeApplicationSummary() {
RMApp app = mock(RMAppImpl.class);
when(app.getApplicationId()).thenReturn(ApplicationId.newInstance(100L, 1));
when(app.getName()).thenReturn("Multiline\n\n\r\rAppName");
when(app.getUser()).thenReturn("Multiline\n\n\r\rUserName");
when(app.getQueue()).thenReturn("Multiline\n\n\r\rQueueName");
when(app.getState()).thenReturn(RMAppState.RUNNING);
when(app.getApplicationType()).thenReturn("MAPREDUCE");
RMAppMetrics metrics = new RMAppMetrics(Resource.newInstance(1234, 56), 10, 1, 16384, 64, 0, 0);
when(app.getRMAppMetrics()).thenReturn(metrics);
RMAppManager.ApplicationSummary.SummaryBuilder summary = new RMAppManager.ApplicationSummary().createAppSummary(app);
String msg = summary.toString();
LOG.info("summary: " + msg);
Assert.assertFalse(msg.contains("\n"));
Assert.assertFalse(msg.contains("\r"));
String escaped = "\\n\\n\\r\\r";
Assert.assertTrue(msg.contains("Multiline" + escaped + "AppName"));
Assert.assertTrue(msg.contains("Multiline" + escaped + "UserName"));
Assert.assertTrue(msg.contains("Multiline" + escaped + "QueueName"));
Assert.assertTrue(msg.contains("memorySeconds=16384"));
Assert.assertTrue(msg.contains("vcoreSeconds=64"));
Assert.assertTrue(msg.contains("preemptedAMContainers=1"));
Assert.assertTrue(msg.contains("preemptedNonAMContainers=10"));
Assert.assertTrue(msg.contains("preemptedResources=<memory:1234\\, vCores:56>"));
Assert.assertTrue(msg.contains("applicationType=MAPREDUCE"));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics in project hadoop by apache.
the class TestRMWebAppFairScheduler method mockRMContext.
private static RMContext mockRMContext(List<RMAppState> states) {
final ConcurrentMap<ApplicationId, RMApp> applicationsMaps = Maps.newConcurrentMap();
int i = 0;
for (RMAppState state : states) {
MockRMApp app = new MockRMApp(i, i, state) {
@Override
public RMAppMetrics getRMAppMetrics() {
return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0, 0, 0);
}
@Override
public YarnApplicationState createApplicationState() {
return YarnApplicationState.ACCEPTED;
}
};
RMAppAttempt attempt = mock(RMAppAttempt.class);
app.setCurrentAppAttempt(attempt);
applicationsMaps.put(app.getApplicationId(), app);
i++;
}
RMContextImpl rmContext = new RMContextImpl(null, null, null, null, null, null, null, null, null, null) {
@Override
public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
return applicationsMaps;
}
@Override
public ResourceScheduler getScheduler() {
return mock(AbstractYarnScheduler.class);
}
};
return rmContext;
}
Aggregations