use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class MockRM method sendNodeEvent.
public void sendNodeEvent(MockNM nm, RMNodeEventType event) throws Exception {
RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(nm.getNodeId());
node.handle(new RMNodeEvent(nm.getNodeId(), event));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class MockRM method sendNodeStarted.
public void sendNodeStarted(MockNM nm) throws Exception {
RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(nm.getNodeId());
node.handle(new RMNodeStartedEvent(nm.getNodeId(), null, null));
drainEventsImplicitly();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.
@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// All nodes 1 to 2 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
// Verify that container is actually running wrt the RM..
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Container Completed in the NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
Thread.sleep(200);
// Verify that container has been removed..
rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertNull(rmContainer);
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
// Send Promotion req... this should result in update error
// Since the container doesn't exist anymore..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl in project hadoop by apache.
the class TestRMAppLogAggregationStatus method testLogAggregationStatus.
@Test
public void testLogAggregationStatus() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
RMApp rmApp = createRMApp(conf);
this.rmContext.getRMApps().put(appId, rmApp);
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
// This application will be running on two nodes
NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
Resource capability = Resource.newInstance(4096, 4);
RMNodeImpl node1 = new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
RMNodeImpl node2 = new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
// The initial log aggregation status for these two nodes
// should be NOT_STARTED
Map<NodeId, LogAggregationReport> logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue().getLogAggregationStatus());
}
List<LogAggregationReport> node1ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode1_1 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_1);
node1ReportForApp.add(report1);
NodeStatus nodeStatus1 = NodeStatus.newInstance(node1.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp));
List<LogAggregationReport> node2ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode2_1 = "node2 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode2_1);
node2ReportForApp.add(report2);
NodeStatus nodeStatus2 = NodeStatus.newInstance(node2.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp));
// node1 and node2 has updated its log aggregation status
// verify that the log aggregation status for node1, node2
// has been changed
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// node1 updates its log aggregation status again
List<LogAggregationReport> node1ReportForApp2 = new ArrayList<LogAggregationReport>();
String messageForNode1_2 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_2);
node1ReportForApp2.add(report1_2);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp2));
// verify that the log aggregation status for node1
// has been changed
// verify that the log aggregation status for node2
// does not change
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1 + "\n" + messageForNode1_2, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// kill the application
rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
// wait for 1500 ms
Thread.sleep(1500);
// the log aggregation status for both nodes should be changed
// to TIME_OUT
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
}
// Finally, node1 finished its log aggregation and sent out its final
// log aggregation status. The log aggregation status for node1 should
// be changed from TIME_OUT to SUCCEEDED
List<LogAggregationReport> node1ReportForApp3 = new ArrayList<LogAggregationReport>();
LogAggregationReport report1_3;
for (int i = 0; i < 10; i++) {
report1_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, "test_message_" + i);
node1ReportForApp3.add(report1_3);
}
node1ReportForApp3.add(LogAggregationReport.newInstance(appId, LogAggregationStatus.SUCCEEDED, ""));
// For every logAggregationReport cached in memory, we can only save at most
// 10 diagnostic messages/failure messages
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp3));
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.SUCCEEDED, report.getValue().getLogAggregationStatus());
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 9; i++) {
builder.append("test_message_" + i);
builder.append("\n");
}
builder.append("test_message_" + 9);
Assert.assertEquals(builder.toString(), report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// update log aggregationStatus for node2 as FAILED,
// so the log aggregation status for the App will become FAILED,
// and we only keep the log aggregation reports whose status is FAILED,
// so the log aggregation report for node1 will be removed.
List<LogAggregationReport> node2ReportForApp2 = new ArrayList<LogAggregationReport>();
LogAggregationReport report2_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING_WITH_FAILURE, "Fail_Message");
LogAggregationReport report2_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.FAILED, "");
node2ReportForApp2.add(report2_2);
node2ReportForApp2.add(report2_3);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp2));
Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertTrue(logAggregationStatus.size() == 1);
Assert.assertTrue(logAggregationStatus.containsKey(node2.getNodeID()));
Assert.assertTrue(!logAggregationStatus.containsKey(node1.getNodeID()));
Assert.assertEquals("Fail_Message", ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(nodeId2));
}
Aggregations