Examples with TaskAttemptID - org.apache.hadoop.mapreduce.TaskAttemptID

Example 66 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestCombineFileInputFormat method testRecordReaderInit.

@Test
public void testRecordReaderInit() throws InterruptedException, IOException {
    // Test that we properly initialize the child recordreader when
    // CombineFileInputFormat and CombineFileRecordReader are used.
    TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    Configuration conf1 = new Configuration();
    conf1.set(DUMMY_KEY, "STATE1");
    TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);
    // This will create a CombineFileRecordReader that itself contains a
    // DummyRecordReader.
    InputFormat inputFormat = new ChildRRInputFormat();
    Path[] files = { new Path("file1") };
    long[] lengths = { 1 };
    CombineFileSplit split = new CombineFileSplit(files, lengths);
    RecordReader rr = inputFormat.createRecordReader(split, context1);
    assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
    // Verify that the initial configuration is the one being used.
    // Right after construction the dummy key should have value "STATE1"
    assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString());
    // Switch the active context for the RecordReader...
    Configuration conf2 = new Configuration();
    conf2.set(DUMMY_KEY, "STATE2");
    TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
    rr.initialize(split, context2);
    // And verify that the new context is updated into the child record reader.
    assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString());
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Test(org.junit.Test)

Example 67 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestMRJobs method testFailingMapper.

@Test(timeout = 60000)
public void testFailingMapper() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testFailingMapper().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    Job job = runFailingMapperJob();
    TaskID taskID = new TaskID(job.getJobID(), TaskType.MAP, 0);
    TaskAttemptID aId = new TaskAttemptID(taskID, 0);
    System.out.println("Diagnostics for " + aId + " :");
    for (String diag : job.getTaskDiagnostics(aId)) {
        System.out.println(diag);
    }
    aId = new TaskAttemptID(taskID, 1);
    System.out.println("Diagnostics for " + aId + " :");
    for (String diag : job.getTaskDiagnostics(aId)) {
        System.out.println(diag);
    }
    TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2);
    Assert.assertEquals(TaskCompletionEvent.Status.FAILED, events[0].getStatus());
    Assert.assertEquals(TaskCompletionEvent.Status.TIPFAILED, events[1].getStatus());
    Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
    verifyFailingMapperCounters(job);
// TODO later:  add explicit "isUber()" checks of some sort
}

Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskCompletionEvent(org.apache.hadoop.mapreduce.TaskCompletionEvent) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) Test(org.junit.Test)

Example 68 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestMRJobsWithProfiler method testProfilerInternal.

private void testProfilerInternal(boolean useDefault) throws Exception {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final SleepJob sleepJob = new SleepJob();
    final JobConf sleepConf = new JobConf(mrCluster.getConfig());
    sleepConf.setProfileEnabled(true);
    sleepConf.setProfileTaskRange(true, String.valueOf(PROFILED_TASK_ID));
    sleepConf.setProfileTaskRange(false, String.valueOf(PROFILED_TASK_ID));
    if (!useDefault) {
        // use hprof for map to profile.out
        sleepConf.set(MRJobConfig.TASK_MAP_PROFILE_PARAMS, "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n," + "file=%s");
        // use Xprof for reduce to stdout
        sleepConf.set(MRJobConfig.TASK_REDUCE_PROFILE_PARAMS, "-Xprof");
    }
    sleepJob.setConf(sleepConf);
    // 2-map-2-reduce SleepJob
    final Job job = sleepJob.createJob(2, 2, 500, 1, 500, 1);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    final JobId jobId = TypeConverter.toYarn(job.getJobID());
    final ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState());
    // Job finished, verify logs
    //
    final Configuration nmConf = mrCluster.getNodeManager(0).getConfig();
    final String appIdStr = appID.toString();
    final String appIdSuffix = appIdStr.substring("application_".length(), appIdStr.length());
    final String containerGlob = "container_" + appIdSuffix + "_*_*";
    final Map<TaskAttemptID, Path> taLogDirs = new HashMap<TaskAttemptID, Path>();
    final Pattern taskPattern = Pattern.compile(".*Task:(attempt_" + appIdSuffix + "_[rm]_" + "[0-9]+_[0-9]+).*");
    for (String logDir : nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) {
        //
        for (FileStatus fileStatus : localFs.globStatus(new Path(logDir + Path.SEPARATOR + appIdStr + Path.SEPARATOR + containerGlob + Path.SEPARATOR + TaskLog.LogName.SYSLOG))) {
            final BufferedReader br = new BufferedReader(new InputStreamReader(localFs.open(fileStatus.getPath())));
            String line;
            while ((line = br.readLine()) != null) {
                final Matcher m = taskPattern.matcher(line);
                if (m.matches()) {
                    // found Task done message
                    taLogDirs.put(TaskAttemptID.forName(m.group(1)), fileStatus.getPath().getParent());
                    break;
                }
            }
            br.close();
        }
    }
    // all 4 attempts found
    Assert.assertEquals(4, taLogDirs.size());
    for (Map.Entry<TaskAttemptID, Path> dirEntry : taLogDirs.entrySet()) {
        final TaskAttemptID tid = dirEntry.getKey();
        final Path profilePath = new Path(dirEntry.getValue(), TaskLog.LogName.PROFILE.toString());
        final Path stdoutPath = new Path(dirEntry.getValue(), TaskLog.LogName.STDOUT.toString());
        if (useDefault || tid.getTaskType() == TaskType.MAP) {
            if (tid.getTaskID().getId() == PROFILED_TASK_ID) {
                // verify profile.out
                final BufferedReader br = new BufferedReader(new InputStreamReader(localFs.open(profilePath)));
                final String line = br.readLine();
                Assert.assertTrue("No hprof content found!", line != null && line.startsWith("JAVA PROFILE"));
                br.close();
                Assert.assertEquals(0L, localFs.getFileStatus(stdoutPath).getLen());
            } else {
                Assert.assertFalse("hprof file should not exist", localFs.exists(profilePath));
            }
        } else {
            Assert.assertFalse("hprof file should not exist", localFs.exists(profilePath));
            if (tid.getTaskID().getId() == PROFILED_TASK_ID) {
                // reducer is profiled with Xprof
                final BufferedReader br = new BufferedReader(new InputStreamReader(localFs.open(stdoutPath)));
                boolean flatProfFound = false;
                String line;
                while ((line = br.readLine()) != null) {
                    if (line.startsWith("Flat profile")) {
                        flatProfFound = true;
                        break;
                    }
                }
                br.close();
                Assert.assertTrue("Xprof flat profile not found!", flatProfFound);
            } else {
                Assert.assertEquals(0L, localFs.getFileStatus(stdoutPath).getLen());
            }
        }
    }
}

Also used : Pattern(java.util.regex.Pattern) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Matcher(java.util.regex.Matcher) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) SleepJob(org.apache.hadoop.mapreduce.SleepJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 69 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestGridMixClasses method testLoadJobLoadRecordReader.

/*
   * test LoadRecordReader. It class reads data from some files.
   */
@Test(timeout = 3000)
public void testLoadJobLoadRecordReader() throws Exception {
    LoadJob.LoadRecordReader test = new LoadJob.LoadRecordReader();
    Configuration conf = new Configuration();
    FileSystem fs1 = mock(FileSystem.class);
    when(fs1.open((Path) anyObject())).thenReturn(new FakeFSDataInputStream(new FakeInputStream()));
    Path p1 = mock(Path.class);
    when(p1.getFileSystem((JobConf) anyObject())).thenReturn(fs1);
    FileSystem fs2 = mock(FileSystem.class);
    when(fs2.open((Path) anyObject())).thenReturn(new FakeFSDataInputStream(new FakeInputStream()));
    Path p2 = mock(Path.class);
    when(p2.getFileSystem((JobConf) anyObject())).thenReturn(fs2);
    Path[] paths = { p1, p2 };
    long[] start = { 0, 0 };
    long[] lengths = { 1000, 1000 };
    String[] locations = { "temp1", "temp2" };
    CombineFileSplit cfsplit = new CombineFileSplit(paths, start, lengths, locations);
    double[] reduceBytes = { 100, 100 };
    double[] reduceRecords = { 2, 2 };
    long[] reduceOutputBytes = { 500, 500 };
    long[] reduceOutputRecords = { 2, 2 };
    ResourceUsageMetrics metrics = new ResourceUsageMetrics();
    ResourceUsageMetrics[] rMetrics = { new ResourceUsageMetrics(), new ResourceUsageMetrics() };
    LoadSplit input = new LoadSplit(cfsplit, 2, 3, 1500L, 2L, 3000L, 2L, reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords, metrics, rMetrics);
    TaskAttemptID taskId = new TaskAttemptID();
    TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, taskId);
    test.initialize(input, ctx);
    GridmixRecord gr = test.getCurrentValue();
    int counter = 0;
    while (test.nextKeyValue()) {
        gr = test.getCurrentValue();
        if (counter == 0) {
            // read first file
            assertEquals(0.5, test.getProgress(), 0.001);
        } else if (counter == 1) {
            // read second file
            assertEquals(1.0, test.getProgress(), 0.001);
        }
        //
        assertEquals(1000, gr.getSize());
        counter++;
    }
    assertEquals(1000, gr.getSize());
    // Two files have been read
    assertEquals(2, counter);
    test.close();
}

Also used : Path(org.apache.hadoop.fs.Path) ResourceUsageMetrics(org.apache.hadoop.tools.rumen.ResourceUsageMetrics) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) CombineFileSplit(org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) Test(org.junit.Test)

Example 70 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestGridMixClasses method testSleepMapper.

/*
   * test SleepMapper
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test(timeout = 30000)
public void testSleepMapper() throws Exception {
    SleepJob.SleepMapper test = new SleepJob.SleepMapper();
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskId = new TaskAttemptID();
    FakeRecordLLReader reader = new FakeRecordLLReader();
    LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
    SleepSplit split = getSleepSplit();
    MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(conf, taskId, reader, writer, committer, reporter, split);
    Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>().getMapContext(mapcontext);
    long start = System.currentTimeMillis();
    LOG.info("start:" + start);
    LongWritable key = new LongWritable(start + 2000);
    LongWritable value = new LongWritable(start + 2000);
    // should slip 2 sec
    test.map(key, value, context);
    LOG.info("finish:" + System.currentTimeMillis());
    assertTrue(System.currentTimeMillis() >= (start + 2000));
    test.cleanup(context);
    assertEquals(1, writer.getData().size());
}

Also used : Context(org.apache.hadoop.mapreduce.Mapper.Context) ReduceContext(org.apache.hadoop.mapreduce.ReduceContext) MapContext(org.apache.hadoop.mapreduce.MapContext) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapred.JobContext) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) LongWritable(org.apache.hadoop.io.LongWritable) SleepSplit(org.apache.hadoop.mapred.gridmix.SleepJob.SleepSplit) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7