use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class FairScheduler method assignToQueue.
/**
* Helper method that attempts to assign the app to a queue. The method is
* responsible to call the appropriate event-handler if the app is rejected.
*/
@VisibleForTesting
FSLeafQueue assignToQueue(RMApp rmApp, String queueName, String user) {
FSLeafQueue queue = null;
String appRejectMsg = null;
try {
QueuePlacementPolicy placementPolicy = allocConf.getPlacementPolicy();
queueName = placementPolicy.assignAppToQueue(queueName, user);
if (queueName == null) {
appRejectMsg = "Application rejected by queue placement policy";
} else {
queue = queueMgr.getLeafQueue(queueName, true);
if (queue == null) {
appRejectMsg = queueName + " is not a leaf queue";
}
}
} catch (IllegalStateException se) {
appRejectMsg = "Unable to match app " + rmApp.getApplicationId() + " to a queue placement policy, and no valid terminal queue " + " placement rule is configured. Please contact an administrator " + " to confirm that the fair scheduler configuration contains a " + " valid terminal queue placement rule.";
} catch (InvalidQueueNameException qne) {
appRejectMsg = qne.getMessage();
} catch (IOException ioe) {
// IOException should only happen for a user without groups
appRejectMsg = "Error assigning app to a queue: " + ioe.getMessage();
}
if (appRejectMsg != null && rmApp != null) {
LOG.error(appRejectMsg);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(rmApp.getApplicationId(), RMAppEventType.APP_REJECTED, appRejectMsg));
return null;
}
if (rmApp != null) {
rmApp.setQueue(queue.getName());
} else {
LOG.error("Couldn't find RM app to set queue name on");
}
return queue;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class FairScheduler method addApplication.
/**
* Add a new application to the scheduler, with a given id, queue name, and
* user. This will accept a new app even if the user or queue is above
* configured limits, but the app will not be marked as runnable.
*/
protected void addApplication(ApplicationId applicationId, String queueName, String user, boolean isAppRecovering) {
if (queueName == null || queueName.isEmpty()) {
String message = "Reject application " + applicationId + " submitted by user " + user + " with an empty queue name.";
LOG.info(message);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
if (queueName.startsWith(".") || queueName.endsWith(".")) {
String message = "Reject application " + applicationId + " submitted by user " + user + " with an illegal queue name " + queueName + ". " + "The queue name cannot start/end with period.";
LOG.info(message);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, message));
return;
}
try {
writeLock.lock();
RMApp rmApp = rmContext.getRMApps().get(applicationId);
FSLeafQueue queue = assignToQueue(rmApp, queueName, user);
if (queue == null) {
return;
}
// Enforce ACLs
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
if (!queue.hasAccess(QueueACL.SUBMIT_APPLICATIONS, userUgi) && !queue.hasAccess(QueueACL.ADMINISTER_QUEUE, userUgi)) {
String msg = "User " + userUgi.getUserName() + " cannot submit applications to queue " + queue.getName() + "(requested queuename is " + queueName + ")";
LOG.info(msg);
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, msg));
return;
}
SchedulerApplication<FSAppAttempt> application = new SchedulerApplication<FSAppAttempt>(queue, user);
applications.put(applicationId, application);
queue.getMetrics().submitApp(user);
LOG.info("Accepted application " + applicationId + " from user: " + user + ", in queue: " + queue.getName() + ", currently num of applications: " + applications.size());
if (isAppRecovering) {
if (LOG.isDebugEnabled()) {
LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED");
}
} else {
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class TestRMAppLogAggregationStatus method testLogAggregationStatus.
@Test
public void testLogAggregationStatus() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setLong(YarnConfiguration.LOG_AGGREGATION_STATUS_TIME_OUT_MS, 1500);
RMApp rmApp = createRMApp(conf);
this.rmContext.getRMApps().put(appId, rmApp);
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.START));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_NEW_SAVED));
rmApp.handle(new RMAppEvent(this.appId, RMAppEventType.APP_ACCEPTED));
// This application will be running on two nodes
NodeId nodeId1 = NodeId.newInstance("localhost", 1234);
Resource capability = Resource.newInstance(4096, 4);
RMNodeImpl node1 = new RMNodeImpl(nodeId1, rmContext, null, 0, 0, null, capability, null);
node1.handle(new RMNodeStartedEvent(nodeId1, null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId1));
NodeId nodeId2 = NodeId.newInstance("localhost", 2345);
RMNodeImpl node2 = new RMNodeImpl(nodeId2, rmContext, null, 0, 0, null, capability, null);
node2.handle(new RMNodeStartedEvent(node2.getNodeID(), null, null));
rmApp.handle(new RMAppRunningOnNodeEvent(this.appId, nodeId2));
// The initial log aggregation status for these two nodes
// should be NOT_STARTED
Map<NodeId, LogAggregationReport> logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.NOT_START, report.getValue().getLogAggregationStatus());
}
List<LogAggregationReport> node1ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode1_1 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_1);
node1ReportForApp.add(report1);
NodeStatus nodeStatus1 = NodeStatus.newInstance(node1.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp));
List<LogAggregationReport> node2ReportForApp = new ArrayList<LogAggregationReport>();
String messageForNode2_1 = "node2 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode2_1);
node2ReportForApp.add(report2);
NodeStatus nodeStatus2 = NodeStatus.newInstance(node2.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, NodeHealthStatus.newInstance(true, null, 0), null, null, null);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp));
// node1 and node2 has updated its log aggregation status
// verify that the log aggregation status for node1, node2
// has been changed
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// node1 updates its log aggregation status again
List<LogAggregationReport> node1ReportForApp2 = new ArrayList<LogAggregationReport>();
String messageForNode1_2 = "node1 logAggregation status updated at " + System.currentTimeMillis();
LogAggregationReport report1_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, messageForNode1_2);
node1ReportForApp2.add(report1_2);
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp2));
// verify that the log aggregation status for node1
// has been changed
// verify that the log aggregation status for node2
// does not change
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode1_1 + "\n" + messageForNode1_2, report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.RUNNING, report.getValue().getLogAggregationStatus());
Assert.assertEquals(messageForNode2_1, report.getValue().getDiagnosticMessage());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// kill the application
rmApp.handle(new RMAppEvent(appId, RMAppEventType.KILL));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.ATTEMPT_KILLED));
rmApp.handle(new RMAppEvent(appId, RMAppEventType.APP_UPDATE_SAVED));
Assert.assertEquals(RMAppState.KILLED, rmApp.getState());
// wait for 1500 ms
Thread.sleep(1500);
// the log aggregation status for both nodes should be changed
// to TIME_OUT
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
}
// Finally, node1 finished its log aggregation and sent out its final
// log aggregation status. The log aggregation status for node1 should
// be changed from TIME_OUT to SUCCEEDED
List<LogAggregationReport> node1ReportForApp3 = new ArrayList<LogAggregationReport>();
LogAggregationReport report1_3;
for (int i = 0; i < 10; i++) {
report1_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING, "test_message_" + i);
node1ReportForApp3.add(report1_3);
}
node1ReportForApp3.add(LogAggregationReport.newInstance(appId, LogAggregationStatus.SUCCEEDED, ""));
// For every logAggregationReport cached in memory, we can only save at most
// 10 diagnostic messages/failure messages
node1.handle(new RMNodeStatusEvent(node1.getNodeID(), nodeStatus1, null, node1ReportForApp3));
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertEquals(2, logAggregationStatus.size());
Assert.assertTrue(logAggregationStatus.containsKey(nodeId1));
Assert.assertTrue(logAggregationStatus.containsKey(nodeId2));
for (Entry<NodeId, LogAggregationReport> report : logAggregationStatus.entrySet()) {
if (report.getKey().equals(node1.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.SUCCEEDED, report.getValue().getLogAggregationStatus());
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 9; i++) {
builder.append("test_message_" + i);
builder.append("\n");
}
builder.append("test_message_" + 9);
Assert.assertEquals(builder.toString(), report.getValue().getDiagnosticMessage());
} else if (report.getKey().equals(node2.getNodeID())) {
Assert.assertEquals(LogAggregationStatus.TIME_OUT, report.getValue().getLogAggregationStatus());
} else {
// should not contain log aggregation report for other nodes
Assert.fail("should not contain log aggregation report for other nodes");
}
}
// update log aggregationStatus for node2 as FAILED,
// so the log aggregation status for the App will become FAILED,
// and we only keep the log aggregation reports whose status is FAILED,
// so the log aggregation report for node1 will be removed.
List<LogAggregationReport> node2ReportForApp2 = new ArrayList<LogAggregationReport>();
LogAggregationReport report2_2 = LogAggregationReport.newInstance(appId, LogAggregationStatus.RUNNING_WITH_FAILURE, "Fail_Message");
LogAggregationReport report2_3 = LogAggregationReport.newInstance(appId, LogAggregationStatus.FAILED, "");
node2ReportForApp2.add(report2_2);
node2ReportForApp2.add(report2_3);
node2.handle(new RMNodeStatusEvent(node2.getNodeID(), nodeStatus2, null, node2ReportForApp2));
Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
logAggregationStatus = rmApp.getLogAggregationReportsForApp();
Assert.assertTrue(logAggregationStatus.size() == 1);
Assert.assertTrue(logAggregationStatus.containsKey(node2.getNodeID()));
Assert.assertTrue(!logAggregationStatus.containsKey(node1.getNodeID()));
Assert.assertEquals("Fail_Message", ((RMAppImpl) rmApp).getLogAggregationFailureMessagesForNM(nodeId2));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class TestDelegationTokenRenewer method testAppRejectionWithCancelledDelegationToken.
@Test(timeout = 60000)
public void testAppRejectionWithCancelledDelegationToken() throws Exception {
MyFS dfs = (MyFS) FileSystem.get(conf);
LOG.info("dfs=" + (Object) dfs.hashCode() + ";conf=" + conf.hashCode());
MyToken token = dfs.getDelegationToken("user1");
token.cancelToken();
Credentials ts = new Credentials();
ts.addToken(token.getKind(), token);
// register the tokens for renewal
ApplicationId appId = BuilderUtils.newApplicationId(0, 0);
delegationTokenRenewer.addApplicationAsync(appId, ts, true, "user", new Configuration());
int waitCnt = 20;
while (waitCnt-- > 0) {
if (!eventQueue.isEmpty()) {
Event evt = eventQueue.take();
if (evt.getType() == RMAppEventType.APP_REJECTED) {
Assert.assertTrue(((RMAppEvent) evt).getApplicationId().equals(appId));
return;
}
} else {
Thread.sleep(500);
}
}
fail("App submission with a cancelled token should have failed");
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent in project hadoop by apache.
the class TestDelegationTokenRenewer method testDTKeepAlive1.
/**
* Basic idea of the test:
* 0. Setup token KEEP_ALIVE
* 1. create tokens.
* 2. register them for renewal - to be cancelled on app complete
* 3. Complete app.
* 4. Verify token is alive within the KEEP_ALIVE time
* 5. Verify token has been cancelled after the KEEP_ALIVE_TIME
* @throws IOException
* @throws URISyntaxException
*/
@Test(timeout = 60000)
public void testDTKeepAlive1() throws Exception {
Configuration lconf = new Configuration(conf);
lconf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
//Keep tokens alive for 6 seconds.
lconf.setLong(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, 6000l);
//Try removing tokens every second.
lconf.setLong(YarnConfiguration.RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS, 1000l);
DelegationTokenRenewer localDtr = createNewDelegationTokenRenewer(lconf, counter);
RMContext mockContext = mock(RMContext.class);
when(mockContext.getSystemCredentialsForApps()).thenReturn(new ConcurrentHashMap<ApplicationId, ByteBuffer>());
ClientRMService mockClientRMService = mock(ClientRMService.class);
when(mockContext.getClientRMService()).thenReturn(mockClientRMService);
when(mockContext.getDelegationTokenRenewer()).thenReturn(localDtr);
when(mockContext.getDispatcher()).thenReturn(dispatcher);
InetSocketAddress sockAddr = InetSocketAddress.createUnresolved("localhost", 1234);
when(mockClientRMService.getBindAddress()).thenReturn(sockAddr);
localDtr.setRMContext(mockContext);
localDtr.init(lconf);
localDtr.start();
MyFS dfs = (MyFS) FileSystem.get(lconf);
LOG.info("dfs=" + (Object) dfs.hashCode() + ";conf=" + lconf.hashCode());
Credentials ts = new Credentials();
// get the delegation tokens
MyToken token1 = dfs.getDelegationToken("user1");
String nn1 = DelegationTokenRenewer.SCHEME + "://host1:0";
ts.addToken(new Text(nn1), token1);
// register the tokens for renewal
ApplicationId applicationId_0 = BuilderUtils.newApplicationId(0, 0);
localDtr.addApplicationAsync(applicationId_0, ts, true, "user", new Configuration());
waitForEventsToGetProcessed(localDtr);
if (!eventQueue.isEmpty()) {
Event evt = eventQueue.take();
if (evt instanceof RMAppEvent) {
Assert.assertEquals(((RMAppEvent) evt).getType(), RMAppEventType.START);
} else {
fail("RMAppEvent.START was expected!!");
}
}
localDtr.applicationFinished(applicationId_0);
waitForEventsToGetProcessed(localDtr);
//Token should still be around. Renewal should not fail.
token1.renew(lconf);
//Allow the keepalive time to run out
Thread.sleep(10000l);
//The token should have been cancelled at this point. Renewal will fail.
try {
token1.renew(lconf);
fail("Renewal of cancelled token should have failed");
} catch (InvalidToken ite) {
}
}
Aggregations