use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler in project hadoop by apache.
the class RMWebServices method getApps.
@GET
@Path("/apps")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public AppsInfo getApps(@Context HttpServletRequest hsr, @QueryParam("state") String stateQuery, @QueryParam("states") Set<String> statesQuery, @QueryParam("finalStatus") String finalStatusQuery, @QueryParam("user") String userQuery, @QueryParam("queue") String queueQuery, @QueryParam("limit") String count, @QueryParam("startedTimeBegin") String startedBegin, @QueryParam("startedTimeEnd") String startedEnd, @QueryParam("finishedTimeBegin") String finishBegin, @QueryParam("finishedTimeEnd") String finishEnd, @QueryParam("applicationTypes") Set<String> applicationTypes, @QueryParam("applicationTags") Set<String> applicationTags) {
boolean checkCount = false;
boolean checkStart = false;
boolean checkEnd = false;
boolean checkAppTypes = false;
boolean checkAppStates = false;
boolean checkAppTags = false;
long countNum = 0;
// set values suitable in case both of begin/end not specified
long sBegin = 0;
long sEnd = Long.MAX_VALUE;
long fBegin = 0;
long fEnd = Long.MAX_VALUE;
init();
if (count != null && !count.isEmpty()) {
checkCount = true;
countNum = Long.parseLong(count);
if (countNum <= 0) {
throw new BadRequestException("limit value must be greater then 0");
}
}
if (startedBegin != null && !startedBegin.isEmpty()) {
checkStart = true;
sBegin = Long.parseLong(startedBegin);
if (sBegin < 0) {
throw new BadRequestException("startedTimeBegin must be greater than 0");
}
}
if (startedEnd != null && !startedEnd.isEmpty()) {
checkStart = true;
sEnd = Long.parseLong(startedEnd);
if (sEnd < 0) {
throw new BadRequestException("startedTimeEnd must be greater than 0");
}
}
if (sBegin > sEnd) {
throw new BadRequestException("startedTimeEnd must be greater than startTimeBegin");
}
if (finishBegin != null && !finishBegin.isEmpty()) {
checkEnd = true;
fBegin = Long.parseLong(finishBegin);
if (fBegin < 0) {
throw new BadRequestException("finishTimeBegin must be greater than 0");
}
}
if (finishEnd != null && !finishEnd.isEmpty()) {
checkEnd = true;
fEnd = Long.parseLong(finishEnd);
if (fEnd < 0) {
throw new BadRequestException("finishTimeEnd must be greater than 0");
}
}
if (fBegin > fEnd) {
throw new BadRequestException("finishTimeEnd must be greater than finishTimeBegin");
}
Set<String> appTypes = parseQueries(applicationTypes, false);
if (!appTypes.isEmpty()) {
checkAppTypes = true;
}
Set<String> appTags = parseQueries(applicationTags, false);
if (!appTags.isEmpty()) {
checkAppTags = true;
}
// stateQuery is deprecated.
if (stateQuery != null && !stateQuery.isEmpty()) {
statesQuery.add(stateQuery);
}
Set<String> appStates = parseQueries(statesQuery, true);
if (!appStates.isEmpty()) {
checkAppStates = true;
}
GetApplicationsRequest request = GetApplicationsRequest.newInstance();
if (checkStart) {
request.setStartRange(sBegin, sEnd);
}
if (checkEnd) {
request.setFinishRange(fBegin, fEnd);
}
if (checkCount) {
request.setLimit(countNum);
}
if (checkAppTypes) {
request.setApplicationTypes(appTypes);
}
if (checkAppTags) {
request.setApplicationTags(appTags);
}
if (checkAppStates) {
request.setApplicationStates(appStates);
}
if (queueQuery != null && !queueQuery.isEmpty()) {
ResourceScheduler rs = rm.getResourceScheduler();
if (rs instanceof CapacityScheduler) {
CapacityScheduler cs = (CapacityScheduler) rs;
// validate queue exists
try {
cs.getQueueInfo(queueQuery, false, false);
} catch (IOException e) {
throw new BadRequestException(e.getMessage());
}
}
Set<String> queues = new HashSet<String>(1);
queues.add(queueQuery);
request.setQueues(queues);
}
if (userQuery != null && !userQuery.isEmpty()) {
Set<String> users = new HashSet<String>(1);
users.add(userQuery);
request.setUsers(users);
}
List<ApplicationReport> appReports = null;
try {
appReports = rm.getClientRMService().getApplications(request, false).getApplicationList();
} catch (YarnException e) {
LOG.error("Unable to retrieve apps from ClientRMService", e);
throw new YarnRuntimeException("Unable to retrieve apps from ClientRMService", e);
}
final ConcurrentMap<ApplicationId, RMApp> apps = rm.getRMContext().getRMApps();
AppsInfo allApps = new AppsInfo();
for (ApplicationReport report : appReports) {
RMApp rmapp = apps.get(report.getApplicationId());
if (rmapp == null) {
continue;
}
if (finalStatusQuery != null && !finalStatusQuery.isEmpty()) {
FinalApplicationStatus.valueOf(finalStatusQuery);
if (!rmapp.getFinalApplicationStatus().toString().equalsIgnoreCase(finalStatusQuery)) {
continue;
}
}
AppInfo app = new AppInfo(rm, rmapp, hasAccess(rmapp, hsr), WebAppUtils.getHttpSchemePrefix(conf));
allApps.add(app);
}
return allApps;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler in project hadoop by apache.
the class RMWebServices method dumpSchedulerLogs.
@POST
@Path("/scheduler/logs")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public String dumpSchedulerLogs(@FormParam("time") String time, @Context HttpServletRequest hsr) throws IOException {
init();
UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true);
ApplicationACLsManager aclsManager = rm.getApplicationACLsManager();
if (aclsManager.areACLsEnabled()) {
if (callerUGI == null || !aclsManager.isAdmin(callerUGI)) {
String msg = "Only admins can carry out this operation.";
throw new ForbiddenException(msg);
}
}
ResourceScheduler rs = rm.getResourceScheduler();
int period = Integer.parseInt(time);
if (period <= 0) {
throw new BadRequestException("Period must be greater than 0");
}
final String logHierarchy = "org.apache.hadoop.yarn.server.resourcemanager.scheduler";
String logfile = "yarn-scheduler-debug.log";
if (rs instanceof CapacityScheduler) {
logfile = "yarn-capacity-scheduler-debug.log";
} else if (rs instanceof FairScheduler) {
logfile = "yarn-fair-scheduler-debug.log";
}
AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logfile);
// time period is sent to us in seconds
dumper.dumpLogs("DEBUG", period * 1000);
return "Capacity scheduler logs are being created.";
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler in project hadoop by apache.
the class TestClientRMService method mockRMContext.
private void mockRMContext(YarnScheduler yarnScheduler, RMContext rmContext) throws IOException {
Dispatcher dispatcher = mock(Dispatcher.class);
when(rmContext.getDispatcher()).thenReturn(dispatcher);
@SuppressWarnings("unchecked") EventHandler<Event> eventHandler = mock(EventHandler.class);
when(dispatcher.getEventHandler()).thenReturn(eventHandler);
QueueInfo queInfo = recordFactory.newRecordInstance(QueueInfo.class);
queInfo.setQueueName("testqueue");
when(yarnScheduler.getQueueInfo(eq("testqueue"), anyBoolean(), anyBoolean())).thenReturn(queInfo);
when(yarnScheduler.getQueueInfo(eq("nonexistentqueue"), anyBoolean(), anyBoolean())).thenThrow(new IOException("queue does not exist"));
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
ConcurrentHashMap<ApplicationId, RMApp> apps = getRMApps(rmContext, yarnScheduler);
when(rmContext.getRMApps()).thenReturn(apps);
when(yarnScheduler.getAppsInQueue(eq("testqueue"))).thenReturn(getSchedulerApps(apps));
ResourceScheduler rs = mock(ResourceScheduler.class);
when(rmContext.getScheduler()).thenReturn(rs);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler in project hadoop by apache.
the class TestClientRMService method getRMApp.
private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, ApplicationId applicationId3, YarnConfiguration config, String queueName, final long memorySeconds, final long vcoreSeconds, String appNodeLabelExpression, String amNodeLabelExpression) {
ApplicationSubmissionContext asContext = mock(ApplicationSubmissionContext.class);
when(asContext.getMaxAppAttempts()).thenReturn(1);
when(asContext.getNodeLabelExpression()).thenReturn(appNodeLabelExpression);
when(asContext.getPriority()).thenReturn(Priority.newInstance(0));
RMAppImpl app = spy(new RMAppImpl(applicationId3, rmContext, config, null, null, queueName, asContext, yarnScheduler, null, System.currentTimeMillis(), "YARN", null, BuilderUtils.newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, Resource.newInstance(1024, 1), 1)) {
@Override
public ApplicationReport createAndGetApplicationReport(String clientUserName, boolean allowAccess) {
ApplicationReport report = super.createAndGetApplicationReport(clientUserName, allowAccess);
ApplicationResourceUsageReport usageReport = report.getApplicationResourceUsageReport();
usageReport.setMemorySeconds(memorySeconds);
usageReport.setVcoreSeconds(vcoreSeconds);
report.setApplicationResourceUsageReport(usageReport);
return report;
}
});
app.getAMResourceRequest().setNodeLabelExpression(amNodeLabelExpression);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(123456, 1), 1);
RMAppAttemptImpl rmAppAttemptImpl = spy(new RMAppAttemptImpl(attemptId, rmContext, yarnScheduler, null, asContext, config, null, app));
Container container = Container.newInstance(ContainerId.newContainerId(attemptId, 1), null, "", null, null, null);
RMContainerImpl containerimpl = spy(new RMContainerImpl(container, SchedulerRequestKey.extractFrom(container), attemptId, null, "", rmContext));
Map<ApplicationAttemptId, RMAppAttempt> attempts = new HashMap<ApplicationAttemptId, RMAppAttempt>();
attempts.put(attemptId, rmAppAttemptImpl);
when(app.getCurrentAppAttempt()).thenReturn(rmAppAttemptImpl);
when(app.getAppAttempts()).thenReturn(attempts);
when(app.getApplicationPriority()).thenReturn(Priority.newInstance(0));
when(rmAppAttemptImpl.getMasterContainer()).thenReturn(container);
ResourceScheduler rs = mock(ResourceScheduler.class);
when(rmContext.getScheduler()).thenReturn(rs);
when(rmContext.getScheduler().getRMContainer(any(ContainerId.class))).thenReturn(containerimpl);
SchedulerAppReport sAppReport = mock(SchedulerAppReport.class);
when(rmContext.getScheduler().getSchedulerAppInfo(any(ApplicationAttemptId.class))).thenReturn(sAppReport);
List<RMContainer> rmContainers = new ArrayList<RMContainer>();
rmContainers.add(containerimpl);
when(rmContext.getScheduler().getSchedulerAppInfo(attemptId).getLiveContainers()).thenReturn(rmContainers);
ContainerStatus cs = mock(ContainerStatus.class);
when(containerimpl.completed()).thenReturn(false);
when(containerimpl.getDiagnosticsInfo()).thenReturn("N/A");
when(containerimpl.getContainerExitStatus()).thenReturn(0);
when(containerimpl.getContainerState()).thenReturn(ContainerState.COMPLETE);
return app;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
Aggregations