Search in sources :

Example 6 with AMInfo

use of org.apache.hadoop.mapreduce.v2.api.records.AMInfo in project hadoop by apache.

the class MRAppMaster method readJustAMInfos.

private List<AMInfo> readJustAMInfos() {
    List<AMInfo> amInfos = new ArrayList<AMInfo>();
    FSDataInputStream inputStream = null;
    try {
        inputStream = getPreviousJobHistoryStream(getConfig(), appAttemptID);
        EventReader jobHistoryEventReader = new EventReader(inputStream);
        // All AMInfos are contiguous. Track when the first AMStartedEvent
        // appears.
        boolean amStartedEventsBegan = false;
        HistoryEvent event;
        while ((event = jobHistoryEventReader.getNextEvent()) != null) {
            if (event.getEventType() == EventType.AM_STARTED) {
                if (!amStartedEventsBegan) {
                    // First AMStartedEvent.
                    amStartedEventsBegan = true;
                }
                AMStartedEvent amStartedEvent = (AMStartedEvent) event;
                amInfos.add(MRBuilderUtils.newAMInfo(amStartedEvent.getAppAttemptId(), amStartedEvent.getStartTime(), amStartedEvent.getContainerId(), StringInterner.weakIntern(amStartedEvent.getNodeManagerHost()), amStartedEvent.getNodeManagerPort(), amStartedEvent.getNodeManagerHttpPort()));
            } else if (amStartedEventsBegan) {
                // No need to continue reading all the other events.
                break;
            }
        }
    } catch (IOException e) {
        LOG.warn("Could not parse the old history file. " + "Will not have old AMinfos ", e);
    } finally {
        if (inputStream != null) {
            IOUtils.closeQuietly(inputStream);
        }
    }
    return amInfos;
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo) AMStartedEvent(org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent) EventReader(org.apache.hadoop.mapreduce.jobhistory.EventReader) ArrayList(java.util.ArrayList) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) IOException(java.io.IOException) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) HistoryEvent(org.apache.hadoop.mapreduce.jobhistory.HistoryEvent)

Example 7 with AMInfo

use of org.apache.hadoop.mapreduce.v2.api.records.AMInfo in project hadoop by apache.

the class MRAppMaster method serviceStart.

@SuppressWarnings("unchecked")
@Override
protected void serviceStart() throws Exception {
    amInfos = new LinkedList<AMInfo>();
    completedTasksFromPreviousRun = new HashMap<TaskId, TaskInfo>();
    processRecovery();
    // Current an AMInfo for the current AM generation.
    AMInfo amInfo = MRBuilderUtils.newAMInfo(appAttemptID, startTime, containerID, nmHost, nmPort, nmHttpPort);
    // /////////////////// Create the job itself.
    job = createJob(getConfig(), forcedState, shutDownMessage);
    // Send out an MR AM inited event for all previous AMs.
    for (AMInfo info : amInfos) {
        dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(info.getAppAttemptId(), info.getStartTime(), info.getContainerId(), info.getNodeManagerHost(), info.getNodeManagerPort(), info.getNodeManagerHttpPort(), appSubmitTime)));
    }
    // Send out an MR AM inited event for this AM.
    dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(amInfo.getAppAttemptId(), amInfo.getStartTime(), amInfo.getContainerId(), amInfo.getNodeManagerHost(), amInfo.getNodeManagerPort(), amInfo.getNodeManagerHttpPort(), this.forcedState == null ? null : this.forcedState.toString(), appSubmitTime)));
    amInfos.add(amInfo);
    // metrics system init is really init & start.
    // It's more test friendly to put it here.
    DefaultMetricsSystem.initialize("MRAppMaster");
    boolean initFailed = false;
    if (!errorHappenedShutDown) {
        // create a job event for job intialization
        JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
        // Send init to the job (this does NOT trigger job execution)
        // This is a synchronous call, not an event through dispatcher. We want
        // job-init to be done completely here.
        jobEventDispatcher.handle(initJobEvent);
        // If job is still not initialized, an error happened during
        // initialization. Must complete starting all of the services so failure
        // events can be processed.
        initFailed = (((JobImpl) job).getInternalState() != JobStateInternal.INITED);
        if (job.isUber()) {
            speculatorEventDispatcher.disableSpeculation();
            LOG.info("MRAppMaster uberizing job " + job.getID() + " in local container (\"uber-AM\") on node " + nmHost + ":" + nmPort + ".");
        } else {
            // send init to speculator only for non-uber jobs. 
            // This won't yet start as dispatcher isn't started yet.
            dispatcher.getEventHandler().handle(new SpeculatorEvent(job.getID(), clock.getTime()));
            LOG.info("MRAppMaster launching normal, non-uberized, multi-container " + "job " + job.getID() + ".");
        }
        // Start ClientService here, since it's not initialized if
        // errorHappenedShutDown is true
        clientService.start();
    }
    //start all the components
    super.serviceStart();
    // finally set the job classloader
    MRApps.setClassLoader(jobClassLoader, getConfig());
    // set job classloader if configured
    Limits.init(getConfig());
    if (initFailed) {
        JobEvent initFailedEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT_FAILED);
        jobEventDispatcher.handle(initFailedEvent);
    } else {
        // All components have started, start the job.
        startJobs();
    }
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) AMStartedEvent(org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) SpeculatorEvent(org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent)

Example 8 with AMInfo

use of org.apache.hadoop.mapreduce.v2.api.records.AMInfo in project hadoop by apache.

the class TestRecovery method testCrashed.

/**
   * AM with 2 maps and 1 reduce. For 1st map, one attempt fails, one attempt
   * completely disappears because of failed launch, one attempt gets killed and
   * one attempt succeeds. AM crashes after the first tasks finishes and
   * recovers completely and succeeds in the second generation.
   * 
   * @throws Exception
   */
@Test
public void testCrashed() throws Exception {
    int runCount = 0;
    long am1StartTimeEst = System.currentTimeMillis();
    MRApp app = new MRAppWithHistory(2, 1, false, this.getClass().getName(), true, ++runCount);
    Configuration conf = new Configuration();
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean("mapred.reducer.new-api", true);
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    conf.set(FileOutputFormat.OUTDIR, outputDir.toString());
    Job job = app.submit(conf);
    app.waitForState(job, JobState.RUNNING);
    long jobStartTime = job.getReport().getStartTime();
    //all maps would be running
    Assert.assertEquals("No of tasks not correct", 3, job.getTasks().size());
    Iterator<Task> it = job.getTasks().values().iterator();
    Task mapTask1 = it.next();
    Task mapTask2 = it.next();
    Task reduceTask = it.next();
    // all maps must be running
    app.waitForState(mapTask1, TaskState.RUNNING);
    app.waitForState(mapTask2, TaskState.RUNNING);
    TaskAttempt task1Attempt1 = mapTask1.getAttempts().values().iterator().next();
    TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next();
    //before sending the TA_DONE, event make sure attempt has come to 
    //RUNNING state
    app.waitForState(task1Attempt1, TaskAttemptState.RUNNING);
    app.waitForState(task2Attempt, TaskAttemptState.RUNNING);
    app.waitForState(reduceTask, TaskState.RUNNING);
    /////////// Play some games with the TaskAttempts of the first task //////
    //send the fail signal to the 1st map task attempt
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt1.getID(), TaskAttemptEventType.TA_FAILMSG));
    app.waitForState(task1Attempt1, TaskAttemptState.FAILED);
    int timeOut = 0;
    while (mapTask1.getAttempts().size() != 2 && timeOut++ < 10) {
        Thread.sleep(2000);
        LOG.info("Waiting for next attempt to start");
    }
    Assert.assertEquals(2, mapTask1.getAttempts().size());
    Iterator<TaskAttempt> itr = mapTask1.getAttempts().values().iterator();
    itr.next();
    TaskAttempt task1Attempt2 = itr.next();
    // wait for the second task attempt to be assigned.
    waitForContainerAssignment(task1Attempt2);
    // This attempt will automatically fail because of the way ContainerLauncher
    // is setup
    // This attempt 'disappears' from JobHistory and so causes MAPREDUCE-3846
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt2.getID(), TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED));
    app.waitForState(task1Attempt2, TaskAttemptState.FAILED);
    timeOut = 0;
    while (mapTask1.getAttempts().size() != 3 && timeOut++ < 10) {
        Thread.sleep(2000);
        LOG.info("Waiting for next attempt to start");
    }
    Assert.assertEquals(3, mapTask1.getAttempts().size());
    itr = mapTask1.getAttempts().values().iterator();
    itr.next();
    itr.next();
    TaskAttempt task1Attempt3 = itr.next();
    app.waitForState(task1Attempt3, TaskAttemptState.RUNNING);
    //send the kill signal to the 1st map 3rd attempt
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt3.getID(), TaskAttemptEventType.TA_KILL));
    app.waitForState(task1Attempt3, TaskAttemptState.KILLED);
    timeOut = 0;
    while (mapTask1.getAttempts().size() != 4 && timeOut++ < 10) {
        Thread.sleep(2000);
        LOG.info("Waiting for next attempt to start");
    }
    Assert.assertEquals(4, mapTask1.getAttempts().size());
    itr = mapTask1.getAttempts().values().iterator();
    itr.next();
    itr.next();
    itr.next();
    TaskAttempt task1Attempt4 = itr.next();
    app.waitForState(task1Attempt4, TaskAttemptState.RUNNING);
    //send the done signal to the 1st map 4th attempt
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(task1Attempt4.getID(), TaskAttemptEventType.TA_DONE));
    /////////// End of games with the TaskAttempts of the first task //////
    //wait for first map task to complete
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    long task1StartTime = mapTask1.getReport().getStartTime();
    long task1FinishTime = mapTask1.getReport().getFinishTime();
    //stop the app
    app.stop();
    //rerun
    //in rerun the 1st map will be recovered from previous run
    long am2StartTimeEst = System.currentTimeMillis();
    app = new MRAppWithHistory(2, 1, false, this.getClass().getName(), false, ++runCount);
    conf = new Configuration();
    conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean("mapred.reducer.new-api", true);
    conf.set(FileOutputFormat.OUTDIR, outputDir.toString());
    conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
    job = app.submit(conf);
    app.waitForState(job, JobState.RUNNING);
    //all maps would be running
    Assert.assertEquals("No of tasks not correct", 3, job.getTasks().size());
    it = job.getTasks().values().iterator();
    mapTask1 = it.next();
    mapTask2 = it.next();
    reduceTask = it.next();
    // first map will be recovered, no need to send done
    app.waitForState(mapTask1, TaskState.SUCCEEDED);
    app.waitForState(mapTask2, TaskState.RUNNING);
    task2Attempt = mapTask2.getAttempts().values().iterator().next();
    //before sending the TA_DONE, event make sure attempt has come to 
    //RUNNING state
    app.waitForState(task2Attempt, TaskAttemptState.RUNNING);
    //send the done signal to the 2nd map task
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapTask2.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE));
    //wait to get it completed
    app.waitForState(mapTask2, TaskState.SUCCEEDED);
    //wait for reduce to be running before sending done
    app.waitForState(reduceTask, TaskState.RUNNING);
    //send the done signal to the reduce
    app.getContext().getEventHandler().handle(new TaskAttemptEvent(reduceTask.getAttempts().values().iterator().next().getID(), TaskAttemptEventType.TA_DONE));
    app.waitForState(job, JobState.SUCCEEDED);
    app.verifyCompleted();
    Assert.assertEquals("Job Start time not correct", jobStartTime, job.getReport().getStartTime());
    Assert.assertEquals("Task Start time not correct", task1StartTime, mapTask1.getReport().getStartTime());
    Assert.assertEquals("Task Finish time not correct", task1FinishTime, mapTask1.getReport().getFinishTime());
    Assert.assertEquals(2, job.getAMInfos().size());
    int attemptNum = 1;
    // Verify AMInfo
    for (AMInfo amInfo : job.getAMInfos()) {
        Assert.assertEquals(attemptNum++, amInfo.getAppAttemptId().getAttemptId());
        Assert.assertEquals(amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId());
        Assert.assertEquals(MRApp.NM_HOST, amInfo.getNodeManagerHost());
        Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort());
        Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort());
    }
    long am1StartTimeReal = job.getAMInfos().get(0).getStartTime();
    long am2StartTimeReal = job.getAMInfos().get(1).getStartTime();
    Assert.assertTrue(am1StartTimeReal >= am1StartTimeEst && am1StartTimeReal <= am2StartTimeEst);
    Assert.assertTrue(am2StartTimeReal >= am2StartTimeEst && am2StartTimeReal <= System.currentTimeMillis());
// TODO Add verification of additional data from jobHistory - whatever was
// available in the failed attempt should be available here
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) Test(org.junit.Test)

Example 9 with AMInfo

use of org.apache.hadoop.mapreduce.v2.api.records.AMInfo in project hadoop by apache.

the class TestMRClientService method verifyJobReport.

private void verifyJobReport(JobReport jr) {
    Assert.assertNotNull("JobReport is null", jr);
    List<AMInfo> amInfos = jr.getAMInfos();
    Assert.assertEquals(1, amInfos.size());
    Assert.assertEquals(JobState.RUNNING, jr.getJobState());
    AMInfo amInfo = amInfos.get(0);
    Assert.assertEquals(MRApp.NM_HOST, amInfo.getNodeManagerHost());
    Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort());
    Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort());
    Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId());
    Assert.assertEquals(1, amInfo.getContainerId().getApplicationAttemptId().getAttemptId());
    Assert.assertTrue(amInfo.getStartTime() > 0);
    Assert.assertEquals(false, jr.isUber());
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo)

Example 10 with AMInfo

use of org.apache.hadoop.mapreduce.v2.api.records.AMInfo in project hadoop by apache.

the class JobReportPBImpl method addAMInfosToProto.

private synchronized void addAMInfosToProto() {
    maybeInitBuilder();
    builder.clearAmInfos();
    if (this.amInfos == null)
        return;
    for (AMInfo amInfo : this.amInfos) {
        builder.addAmInfos(convertToProtoFormat(amInfo));
    }
}
Also used : AMInfo(org.apache.hadoop.mapreduce.v2.api.records.AMInfo)

Aggregations

AMInfo (org.apache.hadoop.mapreduce.v2.api.records.AMInfo)21 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)9 Configuration (org.apache.hadoop.conf.Configuration)5 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)5 IOException (java.io.IOException)4 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)4 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)4 HashMap (java.util.HashMap)3 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)3 JobHistoryParser (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser)3 TaskInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo)3 JobReport (org.apache.hadoop.mapreduce.v2.api.records.JobReport)3 Date (java.util.Date)2 LinkedList (java.util.LinkedList)2 GET (javax.ws.rs.GET)2 Path (javax.ws.rs.Path)2 Produces (javax.ws.rs.Produces)2 FileContext (org.apache.hadoop.fs.FileContext)2 Path (org.apache.hadoop.fs.Path)2 AMStartedEvent (org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent)2