Search in sources :

Example 1 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class RMAppManager method createAndPopulateNewRMApp.

private RMAppImpl createAndPopulateNewRMApp(ApplicationSubmissionContext submissionContext, long submitTime, String user, boolean isRecovery, long startTime) throws YarnException {
    if (!isRecovery) {
        // Do queue mapping
        if (rmContext.getQueuePlacementManager() != null) {
            // We only do queue mapping when it's a new application
            rmContext.getQueuePlacementManager().placeApplication(submissionContext, user);
        }
        // fail the submission if configured application timeout value is invalid
        RMServerUtils.validateApplicationTimeouts(submissionContext.getApplicationTimeouts());
    }
    ApplicationId applicationId = submissionContext.getApplicationId();
    ResourceRequest amReq = null;
    try {
        amReq = validateAndCreateResourceRequest(submissionContext, isRecovery);
    } catch (InvalidLabelResourceRequestException e) {
        // after recovery and user can see what's going on and react accordingly.
        if (isRecovery && !YarnConfiguration.areNodeLabelsEnabled(this.conf)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("AMResourceRequest is not created for " + applicationId + ". NodeLabel is not enabled in cluster, but AM resource " + "request contains a label expression.");
            }
        } else {
            throw e;
        }
    }
    // Verify and get the update application priority and set back to
    // submissionContext
    UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
    Priority appPriority = scheduler.checkAndGetApplicationPriority(submissionContext.getPriority(), userUgi, submissionContext.getQueue(), applicationId);
    submissionContext.setPriority(appPriority);
    // For now, exclude FS for the acl check.
    if (!isRecovery && YarnConfiguration.isAclEnabled(conf) && scheduler instanceof CapacityScheduler) {
        String queueName = submissionContext.getQueue();
        String appName = submissionContext.getApplicationName();
        CSQueue csqueue = ((CapacityScheduler) scheduler).getQueue(queueName);
        if (null != csqueue && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.SUBMIT_APPLICATIONS), applicationId.toString(), appName, Server.getRemoteAddress(), null)) && !authorizer.checkPermission(new AccessRequest(csqueue.getPrivilegedEntity(), userUgi, SchedulerUtils.toAccessType(QueueACL.ADMINISTER_QUEUE), applicationId.toString(), appName, Server.getRemoteAddress(), null))) {
            throw RPCUtil.getRemoteException(new AccessControlException("User " + user + " does not have permission to submit " + applicationId + " to queue " + submissionContext.getQueue()));
        }
    }
    // Create RMApp
    RMAppImpl application = new RMAppImpl(applicationId, rmContext, this.conf, submissionContext.getApplicationName(), user, submissionContext.getQueue(), submissionContext, this.scheduler, this.masterService, submitTime, submissionContext.getApplicationType(), submissionContext.getApplicationTags(), amReq, startTime);
    // influence each other
    if (rmContext.getRMApps().putIfAbsent(applicationId, application) != null) {
        String message = "Application with id " + applicationId + " is already present! Cannot add a duplicate!";
        LOG.warn(message);
        throw new YarnException(message);
    }
    if (YarnConfiguration.timelineServiceV2Enabled(conf)) {
        // Start timeline collector for the submitted app
        application.startTimelineCollector();
    }
    // Inform the ACLs Manager
    this.applicationACLsManager.addApplication(applicationId, submissionContext.getAMContainerSpec().getApplicationACLs());
    String appViewACLs = submissionContext.getAMContainerSpec().getApplicationACLs().get(ApplicationAccessType.VIEW_APP);
    rmContext.getSystemMetricsPublisher().appACLsUpdated(application, appViewACLs, System.currentTimeMillis());
    return application;
}
Also used : RMAppImpl(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl) AccessRequest(org.apache.hadoop.yarn.security.AccessRequest) Priority(org.apache.hadoop.yarn.api.records.Priority) AccessControlException(org.apache.hadoop.security.AccessControlException) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) InvalidLabelResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) CSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 2 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class RMWebServices method getSchedulerInfo.

@GET
@Path("/scheduler")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public SchedulerTypeInfo getSchedulerInfo() {
    init();
    ResourceScheduler rs = rm.getResourceScheduler();
    SchedulerInfo sinfo;
    if (rs instanceof CapacityScheduler) {
        CapacityScheduler cs = (CapacityScheduler) rs;
        CSQueue root = cs.getRootQueue();
        sinfo = new CapacitySchedulerInfo(root, cs);
    } else if (rs instanceof FairScheduler) {
        FairScheduler fs = (FairScheduler) rs;
        sinfo = new FairSchedulerInfo(fs);
    } else if (rs instanceof FifoScheduler) {
        sinfo = new FifoSchedulerInfo(this.rm);
    } else {
        throw new NotFoundException("Unknown scheduler configured");
    }
    return new SchedulerTypeInfo(sinfo);
}
Also used : FairSchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo) CapacitySchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo) FifoSchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FifoSchedulerInfo) SchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo) SchedulerTypeInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerTypeInfo) FairScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler) CapacitySchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo) FairSchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FairSchedulerInfo) NotFoundException(org.apache.hadoop.yarn.webapp.NotFoundException) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) FifoScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler) FifoSchedulerInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.FifoSchedulerInfo) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) CSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 3 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class RMWebServices method getApps.

@GET
@Path("/apps")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public AppsInfo getApps(@Context HttpServletRequest hsr, @QueryParam("state") String stateQuery, @QueryParam("states") Set<String> statesQuery, @QueryParam("finalStatus") String finalStatusQuery, @QueryParam("user") String userQuery, @QueryParam("queue") String queueQuery, @QueryParam("limit") String count, @QueryParam("startedTimeBegin") String startedBegin, @QueryParam("startedTimeEnd") String startedEnd, @QueryParam("finishedTimeBegin") String finishBegin, @QueryParam("finishedTimeEnd") String finishEnd, @QueryParam("applicationTypes") Set<String> applicationTypes, @QueryParam("applicationTags") Set<String> applicationTags) {
    boolean checkCount = false;
    boolean checkStart = false;
    boolean checkEnd = false;
    boolean checkAppTypes = false;
    boolean checkAppStates = false;
    boolean checkAppTags = false;
    long countNum = 0;
    // set values suitable in case both of begin/end not specified
    long sBegin = 0;
    long sEnd = Long.MAX_VALUE;
    long fBegin = 0;
    long fEnd = Long.MAX_VALUE;
    init();
    if (count != null && !count.isEmpty()) {
        checkCount = true;
        countNum = Long.parseLong(count);
        if (countNum <= 0) {
            throw new BadRequestException("limit value must be greater then 0");
        }
    }
    if (startedBegin != null && !startedBegin.isEmpty()) {
        checkStart = true;
        sBegin = Long.parseLong(startedBegin);
        if (sBegin < 0) {
            throw new BadRequestException("startedTimeBegin must be greater than 0");
        }
    }
    if (startedEnd != null && !startedEnd.isEmpty()) {
        checkStart = true;
        sEnd = Long.parseLong(startedEnd);
        if (sEnd < 0) {
            throw new BadRequestException("startedTimeEnd must be greater than 0");
        }
    }
    if (sBegin > sEnd) {
        throw new BadRequestException("startedTimeEnd must be greater than startTimeBegin");
    }
    if (finishBegin != null && !finishBegin.isEmpty()) {
        checkEnd = true;
        fBegin = Long.parseLong(finishBegin);
        if (fBegin < 0) {
            throw new BadRequestException("finishTimeBegin must be greater than 0");
        }
    }
    if (finishEnd != null && !finishEnd.isEmpty()) {
        checkEnd = true;
        fEnd = Long.parseLong(finishEnd);
        if (fEnd < 0) {
            throw new BadRequestException("finishTimeEnd must be greater than 0");
        }
    }
    if (fBegin > fEnd) {
        throw new BadRequestException("finishTimeEnd must be greater than finishTimeBegin");
    }
    Set<String> appTypes = parseQueries(applicationTypes, false);
    if (!appTypes.isEmpty()) {
        checkAppTypes = true;
    }
    Set<String> appTags = parseQueries(applicationTags, false);
    if (!appTags.isEmpty()) {
        checkAppTags = true;
    }
    // stateQuery is deprecated.
    if (stateQuery != null && !stateQuery.isEmpty()) {
        statesQuery.add(stateQuery);
    }
    Set<String> appStates = parseQueries(statesQuery, true);
    if (!appStates.isEmpty()) {
        checkAppStates = true;
    }
    GetApplicationsRequest request = GetApplicationsRequest.newInstance();
    if (checkStart) {
        request.setStartRange(sBegin, sEnd);
    }
    if (checkEnd) {
        request.setFinishRange(fBegin, fEnd);
    }
    if (checkCount) {
        request.setLimit(countNum);
    }
    if (checkAppTypes) {
        request.setApplicationTypes(appTypes);
    }
    if (checkAppTags) {
        request.setApplicationTags(appTags);
    }
    if (checkAppStates) {
        request.setApplicationStates(appStates);
    }
    if (queueQuery != null && !queueQuery.isEmpty()) {
        ResourceScheduler rs = rm.getResourceScheduler();
        if (rs instanceof CapacityScheduler) {
            CapacityScheduler cs = (CapacityScheduler) rs;
            // validate queue exists
            try {
                cs.getQueueInfo(queueQuery, false, false);
            } catch (IOException e) {
                throw new BadRequestException(e.getMessage());
            }
        }
        Set<String> queues = new HashSet<String>(1);
        queues.add(queueQuery);
        request.setQueues(queues);
    }
    if (userQuery != null && !userQuery.isEmpty()) {
        Set<String> users = new HashSet<String>(1);
        users.add(userQuery);
        request.setUsers(users);
    }
    List<ApplicationReport> appReports = null;
    try {
        appReports = rm.getClientRMService().getApplications(request, false).getApplicationList();
    } catch (YarnException e) {
        LOG.error("Unable to retrieve apps from ClientRMService", e);
        throw new YarnRuntimeException("Unable to retrieve apps from ClientRMService", e);
    }
    final ConcurrentMap<ApplicationId, RMApp> apps = rm.getRMContext().getRMApps();
    AppsInfo allApps = new AppsInfo();
    for (ApplicationReport report : appReports) {
        RMApp rmapp = apps.get(report.getApplicationId());
        if (rmapp == null) {
            continue;
        }
        if (finalStatusQuery != null && !finalStatusQuery.isEmpty()) {
            FinalApplicationStatus.valueOf(finalStatusQuery);
            if (!rmapp.getFinalApplicationStatus().toString().equalsIgnoreCase(finalStatusQuery)) {
                continue;
            }
        }
        AppInfo app = new AppInfo(rm, rmapp, hasAccess(rmapp, hsr), WebAppUtils.getHttpSchemePrefix(conf));
        allApps.add(app);
    }
    return allApps;
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) IOException(java.io.IOException) GetApplicationsRequest(org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) AppInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) BadRequestException(org.apache.hadoop.yarn.webapp.BadRequestException) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) AppsInfo(org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) HashSet(java.util.HashSet) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 4 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class RMWebServices method dumpSchedulerLogs.

@POST
@Path("/scheduler/logs")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public String dumpSchedulerLogs(@FormParam("time") String time, @Context HttpServletRequest hsr) throws IOException {
    init();
    UserGroupInformation callerUGI = getCallerUserGroupInformation(hsr, true);
    ApplicationACLsManager aclsManager = rm.getApplicationACLsManager();
    if (aclsManager.areACLsEnabled()) {
        if (callerUGI == null || !aclsManager.isAdmin(callerUGI)) {
            String msg = "Only admins can carry out this operation.";
            throw new ForbiddenException(msg);
        }
    }
    ResourceScheduler rs = rm.getResourceScheduler();
    int period = Integer.parseInt(time);
    if (period <= 0) {
        throw new BadRequestException("Period must be greater than 0");
    }
    final String logHierarchy = "org.apache.hadoop.yarn.server.resourcemanager.scheduler";
    String logfile = "yarn-scheduler-debug.log";
    if (rs instanceof CapacityScheduler) {
        logfile = "yarn-capacity-scheduler-debug.log";
    } else if (rs instanceof FairScheduler) {
        logfile = "yarn-fair-scheduler-debug.log";
    }
    AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logfile);
    // time period is sent to us in seconds
    dumper.dumpLogs("DEBUG", period * 1000);
    return "Capacity scheduler logs are being created.";
}
Also used : ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) ForbiddenException(org.apache.hadoop.yarn.webapp.ForbiddenException) FairScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler) AdHocLogDumper(org.apache.hadoop.yarn.util.AdHocLogDumper) BadRequestException(org.apache.hadoop.yarn.webapp.BadRequestException) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces)

Example 5 with CapacityScheduler

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler in project hadoop by apache.

the class TestNodeBlacklistingOnAMFailures method testNodeBlacklistingOnAMFailure.

@Test(timeout = 100000)
public void testNodeBlacklistingOnAMFailure() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, true);
    DrainDispatcher dispatcher = new DrainDispatcher();
    MockRM rm = startRM(conf, dispatcher);
    CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler();
    // Register 5 nodes, so that we can blacklist atleast one if AM container
    // is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1.
    // First register 2 nodes, and after AM lauched register 3 more nodes.
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService());
    nm1.registerNode();
    MockNM nm2 = new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService());
    nm2.registerNode();
    RMApp app = rm.submitApp(200);
    MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm1);
    ContainerId amContainerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
    RMContainer rmContainer = scheduler.getRMContainer(amContainerId);
    NodeId nodeWhereAMRan = rmContainer.getAllocatedNode();
    MockNM currentNode, otherNode;
    if (nodeWhereAMRan.equals(nm1.getNodeId())) {
        currentNode = nm1;
        otherNode = nm2;
    } else {
        currentNode = nm2;
        otherNode = nm1;
    }
    // register 3 nodes now
    MockNM nm3 = new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService());
    nm3.registerNode();
    MockNM nm4 = new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService());
    nm4.registerNode();
    MockNM nm5 = new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService());
    nm5.registerNode();
    // Set the exist status to INVALID so that we can verify that the system
    // automatically blacklisting the node
    makeAMContainerExit(rm, amContainerId, currentNode, ContainerExitStatus.INVALID);
    // restart the am
    RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm);
    System.out.println("New AppAttempt launched " + attempt.getAppAttemptId());
    // Try the current node a few times
    for (int i = 0; i <= 2; i++) {
        currentNode.nodeHeartbeat(true);
        dispatcher.await();
        Assert.assertEquals("AppAttemptState should still be SCHEDULED if currentNode is " + "blacklisted correctly", RMAppAttemptState.SCHEDULED, attempt.getAppAttemptState());
    }
    // Now try the other node
    otherNode.nodeHeartbeat(true);
    dispatcher.await();
    // Now the AM container should be allocated
    MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000);
    MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId());
    rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED);
    amContainerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
    rmContainer = scheduler.getRMContainer(amContainerId);
    nodeWhereAMRan = rmContainer.getAllocatedNode();
    // The other node should now receive the assignment
    Assert.assertEquals("After blacklisting, AM should have run on the other node", otherNode.getNodeId(), nodeWhereAMRan);
    am2.registerAppAttempt();
    rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
    List<Container> allocatedContainers = TestAMRestart.allocateContainers(currentNode, am2, 1);
    Assert.assertEquals("Even though AM is blacklisted from the node, application can " + "still allocate non-AM containers there", currentNode.getNodeId(), allocatedContainers.get(0).getNodeId());
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

CapacityScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)39 Test (org.junit.Test)21 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)15 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)12 IOException (java.io.IOException)9 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)8 ResourceScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler)8 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)7 ClientResponse (com.sun.jersey.api.client.ClientResponse)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)6 FairScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler)6 AccessControlException (org.apache.hadoop.security.AccessControlException)5 NodeId (org.apache.hadoop.yarn.api.records.NodeId)5 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)5 ArrayList (java.util.ArrayList)4 Container (org.apache.hadoop.yarn.api.records.Container)4 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)4 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)4 JSONObject (org.codehaus.jettison.json.JSONObject)4