Search in sources :

Example 1 with JobId

use of org.apache.hadoop.mapreduce.v2.api.records.JobId in project hadoop by apache.

the class TestMRJobs method testThreadDumpOnTaskTimeout.

@Test(timeout = 120000)
public void testThreadDumpOnTaskTimeout() throws IOException, InterruptedException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final SleepJob sleepJob = new SleepJob();
    final JobConf sleepConf = new JobConf(mrCluster.getConfig());
    sleepConf.setLong(MRJobConfig.TASK_TIMEOUT, 3 * 1000L);
    sleepConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
    sleepJob.setConf(sleepConf);
    if (this instanceof TestUberAM) {
        sleepConf.setInt(MRJobConfig.MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS, 30 * 1000);
    }
    // sleep for 10 seconds to trigger a kill with thread dump
    final Job job = sleepJob.createJob(1, 0, 10 * 60 * 1000L, 1, 0L, 0);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    final JobId jobId = TypeConverter.toYarn(job.getJobID());
    final ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    // Job finished, verify logs
    //
    final String appIdStr = appID.toString();
    final String appIdSuffix = appIdStr.substring("application_".length(), appIdStr.length());
    final String containerGlob = "container_" + appIdSuffix + "_*_*";
    final String syslogGlob = appIdStr + Path.SEPARATOR + containerGlob + Path.SEPARATOR + TaskLog.LogName.SYSLOG;
    int numAppMasters = 0;
    int numMapTasks = 0;
    for (int i = 0; i < NUM_NODE_MGRS; i++) {
        final Configuration nmConf = mrCluster.getNodeManager(i).getConfig();
        for (String logDir : nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) {
            final Path absSyslogGlob = new Path(logDir + Path.SEPARATOR + syslogGlob);
            LOG.info("Checking for glob: " + absSyslogGlob);
            for (FileStatus syslog : localFs.globStatus(absSyslogGlob)) {
                boolean foundAppMaster = false;
                boolean foundThreadDump = false;
                // Determine the container type
                final BufferedReader syslogReader = new BufferedReader(new InputStreamReader(localFs.open(syslog.getPath())));
                try {
                    for (String line; (line = syslogReader.readLine()) != null; ) {
                        if (line.contains(MRAppMaster.class.getName())) {
                            foundAppMaster = true;
                            break;
                        }
                    }
                } finally {
                    syslogReader.close();
                }
                // Check for thread dump in stdout
                final Path stdoutPath = new Path(syslog.getPath().getParent(), TaskLog.LogName.STDOUT.toString());
                final BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(localFs.open(stdoutPath)));
                try {
                    for (String line; (line = stdoutReader.readLine()) != null; ) {
                        if (line.contains("Full thread dump")) {
                            foundThreadDump = true;
                            break;
                        }
                    }
                } finally {
                    stdoutReader.close();
                }
                if (foundAppMaster) {
                    numAppMasters++;
                    if (this instanceof TestUberAM) {
                        Assert.assertTrue("No thread dump", foundThreadDump);
                    } else {
                        Assert.assertFalse("Unexpected thread dump", foundThreadDump);
                    }
                } else {
                    numMapTasks++;
                    Assert.assertTrue("No thread dump", foundThreadDump);
                }
            }
        }
    }
    // Make sure we checked non-empty set
    //
    Assert.assertEquals("No AppMaster log found!", 1, numAppMasters);
    if (sleepConf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false)) {
        Assert.assertSame("MapTask log with uber found!", 0, numMapTasks);
    } else {
        Assert.assertSame("No MapTask log found!", 1, numMapTasks);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 2 with JobId

use of org.apache.hadoop.mapreduce.v2.api.records.JobId in project hadoop by apache.

the class TestMRJobsWithHistoryService method testJobHistoryData.

@Test(timeout = 90000)
public void testJobHistoryData() throws IOException, InterruptedException, AvroRemoteException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(mrCluster.getConfig());
    // Job with 3 maps and 2 reduces
    Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    Counters counterMR = job.getCounters();
    JobId jobId = TypeConverter.toYarn(job.getJobID());
    ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState());
    Counters counterHS = job.getCounters();
    //TODO the Assert below worked. need to check
    //Should we compare each field or convert to V2 counter and compare
    LOG.info("CounterHS " + counterHS);
    LOG.info("CounterMR " + counterMR);
    Assert.assertEquals(counterHS, counterMR);
    HSClientProtocol historyClient = instantiateHistoryProxy();
    GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class);
    gjReq.setJobId(jobId);
    JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport();
    verifyJobReport(jobReport, jobId);
}
Also used : HSClientProtocol(org.apache.hadoop.mapreduce.v2.api.HSClientProtocol) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Counters(org.apache.hadoop.mapreduce.Counters) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Job(org.apache.hadoop.mapreduce.Job) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) JobReport(org.apache.hadoop.mapreduce.v2.api.records.JobReport) Test(org.junit.Test)

Example 3 with JobId

use of org.apache.hadoop.mapreduce.v2.api.records.JobId in project hadoop by apache.

the class CachedHistoryStorage method getAllPartialJobs.

@Override
public Map<JobId, Job> getAllPartialJobs() {
    LOG.debug("Called getAllPartialJobs()");
    SortedMap<JobId, Job> result = new TreeMap<JobId, Job>();
    try {
        for (HistoryFileInfo mi : hsManager.getAllFileInfo()) {
            if (mi != null) {
                JobId id = mi.getJobId();
                result.put(id, new PartialJob(mi.getJobIndexInfo(), id));
            }
        }
    } catch (IOException e) {
        LOG.warn("Error trying to scan for all FileInfos", e);
        throw new YarnRuntimeException(e);
    }
    return result;
}
Also used : YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) HistoryFileInfo(org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo) IOException(java.io.IOException) TreeMap(java.util.TreeMap) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 4 with JobId

use of org.apache.hadoop.mapreduce.v2.api.records.JobId in project hadoop by apache.

the class TestClientServiceDelegate method getJobReportResponse.

private GetJobReportResponse getJobReportResponse() {
    GetJobReportResponse jobReportResponse = Records.newRecord(GetJobReportResponse.class);
    JobReport jobReport = Records.newRecord(JobReport.class);
    jobReport.setJobId(jobId);
    jobReport.setJobState(JobState.SUCCEEDED);
    jobReportResponse.setJobReport(jobReport);
    return jobReportResponse;
}
Also used : GetJobReportResponse(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse) JobReport(org.apache.hadoop.mapreduce.v2.api.records.JobReport)

Example 5 with JobId

use of org.apache.hadoop.mapreduce.v2.api.records.JobId in project hadoop by apache.

the class TestClientServiceDelegate method testReconnectOnAMRestart.

@Test
public void testReconnectOnAMRestart() throws IOException {
    //as instantiateAMProxy is not called at all
    if (!isAMReachableFromClient) {
        return;
    }
    MRClientProtocol historyServerProxy = mock(MRClientProtocol.class);
    // RM returns AM1 url, null, null and AM2 url on invocations.
    // Nulls simulate the time when AM2 is in the process of restarting.
    ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class);
    try {
        when(rmDelegate.getApplicationReport(jobId.getAppId())).thenReturn(getRunningApplicationReport("am1", 78)).thenReturn(getRunningApplicationReport(null, 0)).thenReturn(getRunningApplicationReport(null, 0)).thenReturn(getRunningApplicationReport("am2", 90));
    } catch (YarnException e) {
        throw new IOException(e);
    }
    GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
    when(jobReportResponse1.getJobReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false, ""));
    // First AM returns a report with jobName firstGen and simulates AM shutdown
    // on second invocation.
    MRClientProtocol firstGenAMProxy = mock(MRClientProtocol.class);
    when(firstGenAMProxy.getJobReport(any(GetJobReportRequest.class))).thenReturn(jobReportResponse1).thenThrow(new RuntimeException("AM is down!"));
    GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
    when(jobReportResponse2.getJobReport()).thenReturn(MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user", JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false, ""));
    // Second AM generation returns a report with jobName secondGen
    MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
    when(secondGenAMProxy.getJobReport(any(GetJobReportRequest.class))).thenReturn(jobReportResponse2);
    ClientServiceDelegate clientServiceDelegate = spy(getClientServiceDelegate(historyServerProxy, rmDelegate));
    // First time, connection should be to AM1, then to AM2. Further requests
    // should use the same proxy to AM2 and so instantiateProxy shouldn't be
    // called.
    doReturn(firstGenAMProxy).doReturn(secondGenAMProxy).when(clientServiceDelegate).instantiateAMProxy(any(InetSocketAddress.class));
    JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-firstGen", jobStatus.getJobName());
    jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
    jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
    Assert.assertNotNull(jobStatus);
    Assert.assertEquals("jobName-secondGen", jobStatus.getJobName());
    verify(clientServiceDelegate, times(2)).instantiateAMProxy(any(InetSocketAddress.class));
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) InetSocketAddress(java.net.InetSocketAddress) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) MRClientProtocol(org.apache.hadoop.mapreduce.v2.api.MRClientProtocol) GetJobReportResponse(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse) Test(org.junit.Test)

Aggregations

JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)276 Test (org.junit.Test)238 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)212 ClientResponse (com.sun.jersey.api.client.ClientResponse)103 WebResource (com.sun.jersey.api.client.WebResource)103 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)88 JSONObject (org.codehaus.jettison.json.JSONObject)81 Configuration (org.apache.hadoop.conf.Configuration)77 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)61 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)55 Path (org.apache.hadoop.fs.Path)52 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)51 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)47 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)41 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)36 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)35 IOException (java.io.IOException)32 JobConf (org.apache.hadoop.mapred.JobConf)28 HistoryFileInfo (org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo)25 JobID (org.apache.hadoop.mapreduce.JobID)23