Search in sources :

Example 6 with ApplicationMasterNotRegisteredException

use of org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException in project hadoop by apache.

the class TestApplicationMasterLauncher method testallocateBeforeAMRegistration.

@SuppressWarnings("unused")
@Test(timeout = 100000)
public void testallocateBeforeAMRegistration() throws Exception {
    Logger rootLogger = LogManager.getRootLogger();
    boolean thrown = false;
    rootLogger.setLevel(Level.DEBUG);
    MockRM rm = new MockRM();
    rm.start();
    MockNM nm1 = rm.registerNode("h1:1234", 5000);
    RMApp app = rm.submitApp(2000);
    // kick the scheduling
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    // request for containers
    int request = 2;
    AllocateResponse ar = null;
    try {
        ar = am.allocate("h1", 1000, request, new ArrayList<ContainerId>());
        Assert.fail();
    } catch (ApplicationMasterNotRegisteredException e) {
    }
    // kick the scheduler
    nm1.nodeHeartbeat(true);
    AllocateResponse amrs = null;
    try {
        amrs = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
        Assert.fail();
    } catch (ApplicationMasterNotRegisteredException e) {
    }
    am.registerAppAttempt();
    try {
        am.registerAppAttempt(false);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertEquals("Application Master is already registered : " + attempt.getAppAttemptId().getApplicationId(), e.getMessage());
    }
    // Simulate an AM that was disconnected and app attempt was removed
    // (responseMap does not contain attemptid)
    am.unregisterAppAttempt();
    nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, ContainerState.COMPLETE);
    rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
    try {
        amrs = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
        Assert.fail();
    } catch (ApplicationAttemptNotFoundException e) {
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ArrayList(java.util.ArrayList) Logger(org.apache.log4j.Logger) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) SerializedException(org.apache.hadoop.yarn.api.records.SerializedException) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) NMNotYetReadyException(org.apache.hadoop.yarn.exceptions.NMNotYetReadyException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) Test(org.junit.Test)

Example 7 with ApplicationMasterNotRegisteredException

use of org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException in project hadoop by apache.

the class LocalContainerAllocator method heartbeat.

@SuppressWarnings("unchecked")
@Override
protected synchronized void heartbeat() throws Exception {
    AllocateRequest allocateRequest = AllocateRequest.newInstance(this.lastResponseID, super.getApplicationProgress(), new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>(), null);
    AllocateResponse allocateResponse = null;
    try {
        allocateResponse = scheduler.allocate(allocateRequest);
        // Reset retry count if no exception occurred.
        retrystartTime = System.currentTimeMillis();
    } catch (ApplicationAttemptNotFoundException e) {
        LOG.info("Event from RM: shutting down Application Master");
        // This can happen if the RM has been restarted. If it is in that state,
        // this application must clean itself up.
        eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
        throw new YarnRuntimeException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationID(), e);
    } catch (ApplicationMasterNotRegisteredException e) {
        LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
        this.lastResponseID = 0;
        register();
    } catch (Exception e) {
        // re-trying until the retryInterval has expired.
        if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
            LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
            eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.INTERNAL_ERROR));
            throw new YarnRuntimeException("Could not contact RM after " + retryInterval + " milliseconds.");
        }
        // continue to attempt to contact the RM.
        throw e;
    }
    if (allocateResponse != null) {
        this.lastResponseID = allocateResponse.getResponseId();
        Token token = allocateResponse.getAMRMToken();
        if (token != null) {
            updateAMRMToken(token);
        }
        Priority priorityFromResponse = Priority.newInstance(allocateResponse.getApplicationPriority().getPriority());
        // Update the job priority to Job directly.
        getJob().setJobPriority(priorityFromResponse);
    }
}
Also used : Priority(org.apache.hadoop.yarn.api.records.Priority) AllocateRequest(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) Token(org.apache.hadoop.yarn.api.records.Token) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest)

Example 8 with ApplicationMasterNotRegisteredException

use of org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException in project hadoop by apache.

the class ApplicationMasterService method allocate.

@Override
public AllocateResponse allocate(AllocateRequest request) throws YarnException, IOException {
    AMRMTokenIdentifier amrmTokenIdentifier = YarnServerSecurityUtils.authorizeRequest();
    ApplicationAttemptId appAttemptId = amrmTokenIdentifier.getApplicationAttemptId();
    this.amLivelinessMonitor.receivedPing(appAttemptId);
    /* check if its in cache */
    AllocateResponseLock lock = responseMap.get(appAttemptId);
    if (lock == null) {
        String message = "Application attempt " + appAttemptId + " doesn't exist in ApplicationMasterService cache.";
        LOG.error(message);
        throw new ApplicationAttemptNotFoundException(message);
    }
    synchronized (lock) {
        AllocateResponse lastResponse = lock.getAllocateResponse();
        if (!hasApplicationMasterRegistered(appAttemptId)) {
            String message = "AM is not registered for known application attempt: " + appAttemptId + " or RM had restarted after AM registered. " + " AM should re-register.";
            throw new ApplicationMasterNotRegisteredException(message);
        }
        if ((request.getResponseId() + 1) == lastResponse.getResponseId()) {
            /* old heartbeat */
            return lastResponse;
        } else if (request.getResponseId() + 1 < lastResponse.getResponseId()) {
            String message = "Invalid responseId in AllocateRequest from application attempt: " + appAttemptId + ", expect responseId to be " + (lastResponse.getResponseId() + 1);
            throw new InvalidApplicationMasterRequestException(message);
        }
        AllocateResponse response = recordFactory.newRecordInstance(AllocateResponse.class);
        allocateInternal(amrmTokenIdentifier.getApplicationAttemptId(), request, response);
        // update AMRMToken if the token is rolled-up
        MasterKeyData nextMasterKey = this.rmContext.getAMRMTokenSecretManager().getNextMasterKeyData();
        if (nextMasterKey != null && nextMasterKey.getMasterKey().getKeyId() != amrmTokenIdentifier.getKeyId()) {
            RMApp app = this.rmContext.getRMApps().get(appAttemptId.getApplicationId());
            RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId);
            RMAppAttemptImpl appAttemptImpl = (RMAppAttemptImpl) appAttempt;
            Token<AMRMTokenIdentifier> amrmToken = appAttempt.getAMRMToken();
            if (nextMasterKey.getMasterKey().getKeyId() != appAttemptImpl.getAMRMTokenKeyId()) {
                LOG.info("The AMRMToken has been rolled-over. Send new AMRMToken back" + " to application: " + appAttemptId.getApplicationId());
                amrmToken = rmContext.getAMRMTokenSecretManager().createAndGetAMRMToken(appAttemptId);
                appAttemptImpl.setAMRMToken(amrmToken);
            }
            response.setAMRMToken(org.apache.hadoop.yarn.api.records.Token.newInstance(amrmToken.getIdentifier(), amrmToken.getKind().toString(), amrmToken.getPassword(), amrmToken.getService().toString()));
        }
        /*
       * As we are updating the response inside the lock object so we don't
       * need to worry about unregister call occurring in between (which
       * removes the lock object).
       */
        response.setResponseId(lastResponse.getResponseId() + 1);
        lock.setAllocateResponse(response);
        return response;
    }
}
Also used : InvalidApplicationMasterRequestException(org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) RMAppAttemptImpl(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl) MasterKeyData(org.apache.hadoop.yarn.server.security.MasterKeyData)

Example 9 with ApplicationMasterNotRegisteredException

use of org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException in project hadoop by apache.

the class ApplicationMasterService method finishApplicationMaster.

@Override
public FinishApplicationMasterResponse finishApplicationMaster(FinishApplicationMasterRequest request) throws YarnException, IOException {
    ApplicationAttemptId applicationAttemptId = YarnServerSecurityUtils.authorizeRequest().getApplicationAttemptId();
    ApplicationId appId = applicationAttemptId.getApplicationId();
    RMApp rmApp = rmContext.getRMApps().get(applicationAttemptId.getApplicationId());
    // Remove collector address when app get finished.
    if (YarnConfiguration.timelineServiceV2Enabled(getConfig())) {
        rmApp.removeCollectorAddr();
    }
    // RM work-preserving restart.
    if (rmApp.isAppFinalStateStored()) {
        LOG.info(rmApp.getApplicationId() + " unregistered successfully. ");
        return FinishApplicationMasterResponse.newInstance(true);
    }
    AllocateResponseLock lock = responseMap.get(applicationAttemptId);
    if (lock == null) {
        throwApplicationDoesNotExistInCacheException(applicationAttemptId);
    }
    // Allow only one thread in AM to do finishApp at a time.
    synchronized (lock) {
        if (!hasApplicationMasterRegistered(applicationAttemptId)) {
            String message = "Application Master is trying to unregister before registering for: " + appId;
            LOG.error(message);
            RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appId).getUser(), AuditConstants.UNREGISTER_AM, "", "ApplicationMasterService", message, appId, applicationAttemptId);
            throw new ApplicationMasterNotRegisteredException(message);
        }
        this.amLivelinessMonitor.receivedPing(applicationAttemptId);
        rmContext.getDispatcher().getEventHandler().handle(new RMAppAttemptUnregistrationEvent(applicationAttemptId, request.getTrackingUrl(), request.getFinalApplicationStatus(), request.getDiagnostics()));
        // For UnmanagedAMs, return true so they don't retry
        return FinishApplicationMasterResponse.newInstance(rmApp.getApplicationSubmissionContext().getUnmanagedAM());
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMAppAttemptUnregistrationEvent(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent)

Aggregations

ApplicationMasterNotRegisteredException (org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException)9 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)5 ApplicationAttemptNotFoundException (org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 IOException (java.io.IOException)3 FinishApplicationMasterRequest (org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest)3 ArrayList (java.util.ArrayList)2 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)2 AllocateRequest (org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)2 FinishApplicationMasterResponse (org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 Container (org.apache.hadoop.yarn.api.records.Container)2 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)2 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)2 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)2 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)2 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)2 Test (org.junit.Test)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 SimpleEntry (java.util.AbstractMap.SimpleEntry)1