Search in sources :

Example 1 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class JobHistoryEventHandler method processEventForTimelineServer.

private void processEventForTimelineServer(HistoryEvent event, JobId jobId, long timestamp) {
    TimelineEvent tEvent = new TimelineEvent();
    tEvent.setEventType(StringUtils.toUpperCase(event.getEventType().name()));
    tEvent.setTimestamp(timestamp);
    TimelineEntity tEntity = new TimelineEntity();
    switch(event.getEventType()) {
        case JOB_SUBMITTED:
            JobSubmittedEvent jse = (JobSubmittedEvent) event;
            tEvent.addEventInfo("SUBMIT_TIME", jse.getSubmitTime());
            tEvent.addEventInfo("QUEUE_NAME", jse.getJobQueueName());
            tEvent.addEventInfo("JOB_NAME", jse.getJobName());
            tEvent.addEventInfo("USER_NAME", jse.getUserName());
            tEvent.addEventInfo("JOB_CONF_PATH", jse.getJobConfPath());
            tEvent.addEventInfo("ACLS", jse.getJobAcls());
            tEvent.addEventInfo("JOB_QUEUE_NAME", jse.getJobQueueName());
            tEvent.addEventInfo("WORKFLOW_ID", jse.getWorkflowId());
            tEvent.addEventInfo("WORKFLOW_NAME", jse.getWorkflowName());
            tEvent.addEventInfo("WORKFLOW_NAME_NAME", jse.getWorkflowNodeName());
            tEvent.addEventInfo("WORKFLOW_ADJACENCIES", jse.getWorkflowAdjacencies());
            tEvent.addEventInfo("WORKFLOW_TAGS", jse.getWorkflowTags());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_STATUS_CHANGED:
            JobStatusChangedEvent jsce = (JobStatusChangedEvent) event;
            tEvent.addEventInfo("STATUS", jsce.getStatus());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_INFO_CHANGED:
            JobInfoChangeEvent jice = (JobInfoChangeEvent) event;
            tEvent.addEventInfo("SUBMIT_TIME", jice.getSubmitTime());
            tEvent.addEventInfo("LAUNCH_TIME", jice.getLaunchTime());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_INITED:
            JobInitedEvent jie = (JobInitedEvent) event;
            tEvent.addEventInfo("START_TIME", jie.getLaunchTime());
            tEvent.addEventInfo("STATUS", jie.getStatus());
            tEvent.addEventInfo("TOTAL_MAPS", jie.getTotalMaps());
            tEvent.addEventInfo("TOTAL_REDUCES", jie.getTotalReduces());
            tEvent.addEventInfo("UBERIZED", jie.getUberized());
            tEntity.setStartTime(jie.getLaunchTime());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_PRIORITY_CHANGED:
            JobPriorityChangeEvent jpce = (JobPriorityChangeEvent) event;
            tEvent.addEventInfo("PRIORITY", jpce.getPriority().toString());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_QUEUE_CHANGED:
            JobQueueChangeEvent jqe = (JobQueueChangeEvent) event;
            tEvent.addEventInfo("QUEUE_NAMES", jqe.getJobQueueName());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_FAILED:
        case JOB_KILLED:
        case JOB_ERROR:
            JobUnsuccessfulCompletionEvent juce = (JobUnsuccessfulCompletionEvent) event;
            tEvent.addEventInfo("FINISH_TIME", juce.getFinishTime());
            tEvent.addEventInfo("NUM_MAPS", juce.getFinishedMaps());
            tEvent.addEventInfo("NUM_REDUCES", juce.getFinishedReduces());
            tEvent.addEventInfo("JOB_STATUS", juce.getStatus());
            tEvent.addEventInfo("DIAGNOSTICS", juce.getDiagnostics());
            tEvent.addEventInfo("FINISHED_MAPS", juce.getFinishedMaps());
            tEvent.addEventInfo("FINISHED_REDUCES", juce.getFinishedReduces());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case JOB_FINISHED:
            JobFinishedEvent jfe = (JobFinishedEvent) event;
            tEvent.addEventInfo("FINISH_TIME", jfe.getFinishTime());
            tEvent.addEventInfo("NUM_MAPS", jfe.getFinishedMaps());
            tEvent.addEventInfo("NUM_REDUCES", jfe.getFinishedReduces());
            tEvent.addEventInfo("FAILED_MAPS", jfe.getFailedMaps());
            tEvent.addEventInfo("FAILED_REDUCES", jfe.getFailedReduces());
            tEvent.addEventInfo("FINISHED_MAPS", jfe.getFinishedMaps());
            tEvent.addEventInfo("FINISHED_REDUCES", jfe.getFinishedReduces());
            tEvent.addEventInfo("MAP_COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(jfe.getMapCounters()));
            tEvent.addEventInfo("REDUCE_COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(jfe.getReduceCounters()));
            tEvent.addEventInfo("TOTAL_COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(jfe.getTotalCounters()));
            tEvent.addEventInfo("JOB_STATUS", JobState.SUCCEEDED.toString());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        case TASK_STARTED:
            TaskStartedEvent tse = (TaskStartedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tse.getTaskType().toString());
            tEvent.addEventInfo("START_TIME", tse.getStartTime());
            tEvent.addEventInfo("SPLIT_LOCATIONS", tse.getSplitLocations());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tse.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case TASK_FAILED:
            TaskFailedEvent tfe = (TaskFailedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tfe.getTaskType().toString());
            tEvent.addEventInfo("STATUS", TaskStatus.State.FAILED.toString());
            tEvent.addEventInfo("FINISH_TIME", tfe.getFinishTime());
            tEvent.addEventInfo("ERROR", tfe.getError());
            tEvent.addEventInfo("FAILED_ATTEMPT_ID", tfe.getFailedAttemptID() == null ? "" : tfe.getFailedAttemptID().toString());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(tfe.getCounters()));
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tfe.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case TASK_UPDATED:
            TaskUpdatedEvent tue = (TaskUpdatedEvent) event;
            tEvent.addEventInfo("FINISH_TIME", tue.getFinishTime());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tue.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case TASK_FINISHED:
            TaskFinishedEvent tfe2 = (TaskFinishedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tfe2.getTaskType().toString());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(tfe2.getCounters()));
            tEvent.addEventInfo("FINISH_TIME", tfe2.getFinishTime());
            tEvent.addEventInfo("STATUS", TaskStatus.State.SUCCEEDED.toString());
            tEvent.addEventInfo("SUCCESSFUL_TASK_ATTEMPT_ID", tfe2.getSuccessfulTaskAttemptId() == null ? "" : tfe2.getSuccessfulTaskAttemptId().toString());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tfe2.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case MAP_ATTEMPT_STARTED:
        case CLEANUP_ATTEMPT_STARTED:
        case REDUCE_ATTEMPT_STARTED:
        case SETUP_ATTEMPT_STARTED:
            TaskAttemptStartedEvent tase = (TaskAttemptStartedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tase.getTaskType().toString());
            tEvent.addEventInfo("TASK_ATTEMPT_ID", tase.getTaskAttemptId().toString());
            tEvent.addEventInfo("START_TIME", tase.getStartTime());
            tEvent.addEventInfo("HTTP_PORT", tase.getHttpPort());
            tEvent.addEventInfo("TRACKER_NAME", tase.getTrackerName());
            tEvent.addEventInfo("SHUFFLE_PORT", tase.getShufflePort());
            tEvent.addEventInfo("CONTAINER_ID", tase.getContainerId() == null ? "" : tase.getContainerId().toString());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tase.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case MAP_ATTEMPT_FAILED:
        case CLEANUP_ATTEMPT_FAILED:
        case REDUCE_ATTEMPT_FAILED:
        case SETUP_ATTEMPT_FAILED:
        case MAP_ATTEMPT_KILLED:
        case CLEANUP_ATTEMPT_KILLED:
        case REDUCE_ATTEMPT_KILLED:
        case SETUP_ATTEMPT_KILLED:
            TaskAttemptUnsuccessfulCompletionEvent tauce = (TaskAttemptUnsuccessfulCompletionEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tauce.getTaskType().toString());
            tEvent.addEventInfo("TASK_ATTEMPT_ID", tauce.getTaskAttemptId() == null ? "" : tauce.getTaskAttemptId().toString());
            tEvent.addEventInfo("FINISH_TIME", tauce.getFinishTime());
            tEvent.addEventInfo("ERROR", tauce.getError());
            tEvent.addEventInfo("STATUS", tauce.getTaskStatus());
            tEvent.addEventInfo("HOSTNAME", tauce.getHostname());
            tEvent.addEventInfo("PORT", tauce.getPort());
            tEvent.addEventInfo("RACK_NAME", tauce.getRackName());
            tEvent.addEventInfo("SHUFFLE_FINISH_TIME", tauce.getFinishTime());
            tEvent.addEventInfo("SORT_FINISH_TIME", tauce.getFinishTime());
            tEvent.addEventInfo("MAP_FINISH_TIME", tauce.getFinishTime());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(tauce.getCounters()));
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tauce.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case MAP_ATTEMPT_FINISHED:
            MapAttemptFinishedEvent mafe = (MapAttemptFinishedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", mafe.getTaskType().toString());
            tEvent.addEventInfo("FINISH_TIME", mafe.getFinishTime());
            tEvent.addEventInfo("STATUS", mafe.getTaskStatus());
            tEvent.addEventInfo("STATE", mafe.getState());
            tEvent.addEventInfo("MAP_FINISH_TIME", mafe.getMapFinishTime());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(mafe.getCounters()));
            tEvent.addEventInfo("HOSTNAME", mafe.getHostname());
            tEvent.addEventInfo("PORT", mafe.getPort());
            tEvent.addEventInfo("RACK_NAME", mafe.getRackName());
            tEvent.addEventInfo("ATTEMPT_ID", mafe.getAttemptId() == null ? "" : mafe.getAttemptId().toString());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(mafe.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case REDUCE_ATTEMPT_FINISHED:
            ReduceAttemptFinishedEvent rafe = (ReduceAttemptFinishedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", rafe.getTaskType().toString());
            tEvent.addEventInfo("ATTEMPT_ID", rafe.getAttemptId() == null ? "" : rafe.getAttemptId().toString());
            tEvent.addEventInfo("FINISH_TIME", rafe.getFinishTime());
            tEvent.addEventInfo("STATUS", rafe.getTaskStatus());
            tEvent.addEventInfo("STATE", rafe.getState());
            tEvent.addEventInfo("SHUFFLE_FINISH_TIME", rafe.getShuffleFinishTime());
            tEvent.addEventInfo("SORT_FINISH_TIME", rafe.getSortFinishTime());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(rafe.getCounters()));
            tEvent.addEventInfo("HOSTNAME", rafe.getHostname());
            tEvent.addEventInfo("PORT", rafe.getPort());
            tEvent.addEventInfo("RACK_NAME", rafe.getRackName());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(rafe.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case SETUP_ATTEMPT_FINISHED:
        case CLEANUP_ATTEMPT_FINISHED:
            TaskAttemptFinishedEvent tafe = (TaskAttemptFinishedEvent) event;
            tEvent.addEventInfo("TASK_TYPE", tafe.getTaskType().toString());
            tEvent.addEventInfo("ATTEMPT_ID", tafe.getAttemptId() == null ? "" : tafe.getAttemptId().toString());
            tEvent.addEventInfo("FINISH_TIME", tafe.getFinishTime());
            tEvent.addEventInfo("STATUS", tafe.getTaskStatus());
            tEvent.addEventInfo("STATE", tafe.getState());
            tEvent.addEventInfo("COUNTERS_GROUPS", JobHistoryEventUtils.countersToJSON(tafe.getCounters()));
            tEvent.addEventInfo("HOSTNAME", tafe.getHostname());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(tafe.getTaskId().toString());
            tEntity.setEntityType(MAPREDUCE_TASK_ENTITY_TYPE);
            tEntity.addRelatedEntity(MAPREDUCE_JOB_ENTITY_TYPE, jobId.toString());
            break;
        case AM_STARTED:
            AMStartedEvent ase = (AMStartedEvent) event;
            tEvent.addEventInfo("APPLICATION_ATTEMPT_ID", ase.getAppAttemptId() == null ? "" : ase.getAppAttemptId().toString());
            tEvent.addEventInfo("CONTAINER_ID", ase.getContainerId() == null ? "" : ase.getContainerId().toString());
            tEvent.addEventInfo("NODE_MANAGER_HOST", ase.getNodeManagerHost());
            tEvent.addEventInfo("NODE_MANAGER_PORT", ase.getNodeManagerPort());
            tEvent.addEventInfo("NODE_MANAGER_HTTP_PORT", ase.getNodeManagerHttpPort());
            tEvent.addEventInfo("START_TIME", ase.getStartTime());
            tEvent.addEventInfo("SUBMIT_TIME", ase.getSubmitTime());
            tEntity.addEvent(tEvent);
            tEntity.setEntityId(jobId.toString());
            tEntity.setEntityType(MAPREDUCE_JOB_ENTITY_TYPE);
            break;
        default:
            break;
    }
    try {
        TimelinePutResponse response = timelineClient.putEntities(tEntity);
        List<TimelinePutResponse.TimelinePutError> errors = response.getErrors();
        if (errors.size() == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Timeline entities are successfully put in event " + event.getEventType());
            }
        } else {
            for (TimelinePutResponse.TimelinePutError error : errors) {
                LOG.error("Error when publishing entity [" + error.getEntityType() + "," + error.getEntityId() + "], server side error code: " + error.getErrorCode());
            }
        }
    } catch (YarnException | IOException | ClientHandlerException ex) {
        LOG.error("Error putting entity " + tEntity.getEntityId() + " to Timeline" + "Server", ex);
    }
}
Also used : TimelineEvent(org.apache.hadoop.yarn.api.records.timeline.TimelineEvent) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) ClientHandlerException(com.sun.jersey.api.client.ClientHandlerException) TimelinePutResponse(org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse) IOException(java.io.IOException) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity)

Example 2 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestClientServiceDelegate method testRMDownRestoreForJobStatusBeforeGetAMReport.

@Test
public void testRMDownRestoreForJobStatusBeforeGetAMReport() throws IOException {
    Configuration conf = new YarnConfiguration();
    conf.setInt(MRJobConfig.MR_CLIENT_MAX_RETRIES, 3);
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
    conf.setBoolean(MRJobConfig.JOB_AM_ACCESS_DISABLED, !isAMReachableFromClient);
    MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
    when(historyServerProxy.getJobReport(any(GetJobReportRequest.class))).thenReturn(getJobReportResponse());
    ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
    try {
        when(rmDelegate.getApplicationReport(jobId.getAppId())).thenThrow(new java.lang.reflect.UndeclaredThrowableException(new IOException("Connection refuced1"))).thenThrow(new java.lang.reflect.UndeclaredThrowableException(new IOException("Connection refuced2"))).thenReturn(getFinishedApplicationReport());
        ClientServiceDelegate clientServiceDelegate = new ClientServiceDelegate(conf, rmDelegate, oldJobId, historyServerProxy);
        JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
        verify(rmDelegate, times(3)).getApplicationReport(any(ApplicationId.class));
        Assert.assertNotNull(jobStatus);
    } catch (YarnException e) {
        throw new IOException(e);
    }
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) MRClientProtocol(org.apache.hadoop.mapreduce.v2.api.MRClientProtocol) JobStatus(org.apache.hadoop.mapreduce.JobStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 3 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestClientServiceDelegate method testReconnectOnAMRestart.

@Test
public void testReconnectOnAMRestart() throws IOException {
    //as instantiateAMProxy is not called at all
    if (!isAMReachableFromClient) {
        return;
    }
    MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
    // RM returns AM1 url, null, null and AM2 url on invocations.
    // Nulls simulate the time when AM2 is in the process of restarting.
    ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
    try {
        when(rmDelegate.getApplicationReport(jobId.getAppId())).thenReturn(getRunningApplicationReport("am1", 78)).thenReturn(getRunningApplicationReport(null, 0)).thenReturn(getRunningApplicationReport(null, 0)).thenReturn(getRunningApplicationReport("am2", 90));
    } catch (YarnException e) {
        throw new IOException(e);
    }
    GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
    when(jobReportResponse1.getJobReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false, ""));
    // First AM returns a report with jobName firstGen and simulates AM shutdown
    // on second invocation.
    MRClientProtocol firstGenAMProxy = mock(MRClientProtocol.class);
    when(firstGenAMProxy.getJobReport(any(GetJobReportRequest.class))).thenReturn(jobReportResponse1).thenThrow(new RuntimeException("AM is down!"));
    GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
    when(jobReportResponse2.getJobReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false, ""));
    // Second AM generation returns a report with jobName secondGen
    MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
    when(secondGenAMProxy.getJobReport(any(GetJobReportRequest.class))).thenReturn(jobReportResponse2);
    ClientServiceDelegate clientServiceDelegate = spy(getClientServiceDelegate(historyServerProxy, rmDelegate));
    // First time, connection should be to AM1, then to AM2. Further requests
    // should use the same proxy to AM2 and so instantiateProxy shouldn't be
    // called.
    doReturn(firstGenAMProxy).doReturn(secondGenAMProxy).when(clientServiceDelegate).instantiateAMProxy(any(InetSocketAddress.class));
    JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-firstGen", jobStatus.getJobName());
    jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
    jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
    verify(clientServiceDelegate, times(2)).instantiateAMProxy(any(InetSocketAddress.class));
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) MRClientProtocol(org.apache.hadoop.mapreduce.v2.api.MRClientProtocol) GetJobReportResponse(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse) Test(org.junit.Test)

Example 4 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class BaseAMRMProxyE2ETest method createAMRMProtocol.

protected ApplicationMasterProtocol createAMRMProtocol(YarnClient rmClient, ApplicationId appId, MiniYARNCluster cluster, final Configuration yarnConf) throws IOException, InterruptedException, YarnException {
    UserGroupInformation user = null;
    // Get the AMRMToken from AMRMProxy
    ApplicationReport report = rmClient.getApplicationReport(appId);
    user = UserGroupInformation.createProxyUser(report.getCurrentApplicationAttemptId().toString(), UserGroupInformation.getCurrentUser());
    ContainerManagerImpl containerManager = (ContainerManagerImpl) cluster.getNodeManager(0).getNMContext().getContainerManager();
    AMRMProxyTokenSecretManager amrmTokenSecretManager = containerManager.getAMRMProxyService().getSecretManager();
    org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManager.createAndGetAMRMToken(report.getCurrentApplicationAttemptId());
    SecurityUtil.setTokenService(token, containerManager.getAMRMProxyService().getBindAddress());
    user.addToken(token);
    return user.doAs(new PrivilegedExceptionAction<ApplicationMasterProtocol>() {

        @Override
        public ApplicationMasterProtocol run() throws Exception {
            return ClientRMProxy.createRMProxy(yarnConf, ApplicationMasterProtocol.class);
        }
    });
}
Also used : AMRMProxyTokenSecretManager(org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyTokenSecretManager) ApplicationMasterProtocol(org.apache.hadoop.yarn.api.ApplicationMasterProtocol) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 5 with YarnException

use of org.apache.hadoop.yarn.exceptions.YarnException in project hadoop by apache.

the class TestAMRMClient method allocateAndStartContainers.

private List<Container> allocateAndStartContainers(final AMRMClient<ContainerRequest> amClient, final NMClient nmClient, int num) throws YarnException, IOException {
    // set up allocation requests
    for (int i = 0; i < num; ++i) {
        amClient.addContainerRequest(new ContainerRequest(capability, nodes, racks, priority));
    }
    // send allocation requests
    amClient.allocate(0.1f);
    // let NM heartbeat to RM and trigger allocations
    triggerSchedulingWithNMHeartBeat();
    // get allocations
    AllocateResponse allocResponse = amClient.allocate(0.1f);
    List<Container> containers = allocResponse.getAllocatedContainers();
    Assert.assertEquals(num, containers.size());
    // build container launch context
    Credentials ts = new Credentials();
    DataOutputBuffer dob = new DataOutputBuffer();
    ts.writeTokenStorageToStream(dob);
    ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    // start a process long enough for increase/decrease action to take effect
    ContainerLaunchContext clc = BuilderUtils.newContainerLaunchContext(Collections.<String, LocalResource>emptyMap(), new HashMap<String, String>(), Arrays.asList("sleep", "100"), new HashMap<String, ByteBuffer>(), securityTokens, new HashMap<ApplicationAccessType, String>());
    // start the containers and make sure they are in RUNNING state
    try {
        for (int i = 0; i < num; i++) {
            Container container = containers.get(i);
            nmClient.startContainer(container, clc);
            // container status
            while (true) {
                ContainerStatus status = nmClient.getContainerStatus(container.getId(), container.getNodeId());
                if (status.getState() == ContainerState.RUNNING) {
                    break;
                }
                sleep(10);
            }
        }
    } catch (YarnException e) {
        throw new AssertionError("Exception is not expected: " + e);
    }
    // let NM's heartbeat to RM to confirm container launch
    triggerSchedulingWithNMHeartBeat();
    return containers;
}
Also used : ByteBuffer(java.nio.ByteBuffer) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ContainerRequest(org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest) Credentials(org.apache.hadoop.security.Credentials)

Aggregations

YarnException (org.apache.hadoop.yarn.exceptions.YarnException)283 IOException (java.io.IOException)145 Test (org.junit.Test)107 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)61 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)44 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)31 Configuration (org.apache.hadoop.conf.Configuration)26 ArrayList (java.util.ArrayList)25 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)25 ApplicationNotFoundException (org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException)25 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)24 AccessControlException (org.apache.hadoop.security.AccessControlException)22 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)21 UndeclaredThrowableException (java.lang.reflect.UndeclaredThrowableException)16 Path (org.apache.hadoop.fs.Path)16 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)15 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)15 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)14 HashMap (java.util.HashMap)13 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)13