Search in sources :

Example 16 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestLineRecordReader method readRecords.

// Use the LineRecordReader to read records from the file
public ArrayList<String> readRecords(URL testFileUrl, int splitSize) throws IOException {
    // Set up context
    File testFile = new File(testFileUrl.getFile());
    long testFileSize = testFile.length();
    Path testFilePath = new Path(testFile.getAbsolutePath());
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 1);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    // Gather the records returned by the record reader
    ArrayList<String> records = new ArrayList<String>();
    long offset = 0;
    while (offset < testFileSize) {
        FileSplit split = new FileSplit(testFilePath, offset, splitSize, null);
        LineRecordReader reader = new LineRecordReader();
        reader.initialize(split, context);
        while (reader.nextKeyValue()) {
            records.add(reader.getCurrentValue().toString());
        }
        offset += splitSize;
    }
    return records;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) File(java.io.File)

Example 17 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestWrappedRRClassloader method testClassLoader.

/**
   * Tests the class loader set by 
   * {@link Configuration#setClassLoader(ClassLoader)}
   * is inherited by any {@link WrappedRecordReader}s created by
   * {@link CompositeRecordReader}
   */
@Test
public void testClassLoader() throws Exception {
    Configuration conf = new Configuration();
    Fake_ClassLoader classLoader = new Fake_ClassLoader();
    conf.setClassLoader(classLoader);
    assertTrue(conf.getClassLoader() instanceof Fake_ClassLoader);
    FileSystem fs = FileSystem.get(conf);
    Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
    Path base = new Path(testdir, "/empty");
    Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
    conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
    CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
    // create dummy TaskAttemptID
    TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString());
    inputFormat.createRecordReader(inputFormat.getSplits(Job.getInstance(conf)).get(0), new TaskAttemptContextImpl(conf, tid));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 18 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestUberAM method testFailingMapper.

@Override
@Test
public void testFailingMapper() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting uberized testFailingMapper().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    Job job = runFailingMapperJob();
    // should be able to get diags for single task attempt...
    TaskID taskID = new TaskID(job.getJobID(), TaskType.MAP, 0);
    TaskAttemptID aId = new TaskAttemptID(taskID, 0);
    System.out.println("Diagnostics for " + aId + " :");
    for (String diag : job.getTaskDiagnostics(aId)) {
        System.out.println(diag);
    }
    // ...but not for second (shouldn't exist:  uber-AM overrode max attempts)
    boolean secondTaskAttemptExists = true;
    try {
        aId = new TaskAttemptID(taskID, 1);
        System.out.println("Diagnostics for " + aId + " :");
        for (String diag : job.getTaskDiagnostics(aId)) {
            System.out.println(diag);
        }
    } catch (Exception e) {
        secondTaskAttemptExists = false;
    }
    Assert.assertEquals(false, secondTaskAttemptExists);
    TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2);
    Assert.assertEquals(1, events.length);
    // TIPFAILED if it comes from the AM, FAILED if it comes from the JHS
    TaskCompletionEvent.Status status = events[0].getStatus();
    Assert.assertTrue(status == TaskCompletionEvent.Status.FAILED || status == TaskCompletionEvent.Status.TIPFAILED);
    Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
//Disabling till UberAM honors MRJobConfig.MAP_MAX_ATTEMPTS
//verifyFailingMapperCounters(job);
// TODO later:  add explicit "isUber()" checks of some sort
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskCompletionEvent(org.apache.hadoop.mapreduce.TaskCompletionEvent) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 19 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestMergeManager method verifyReservedMapOutputType.

private void verifyReservedMapOutputType(MergeManagerImpl<Text, Text> mgr, long size, String expectedShuffleMode) throws IOException {
    final TaskAttemptID mapId = TaskAttemptID.forName("attempt_0_1_m_1_1");
    final MapOutput<Text, Text> mapOutput = mgr.reserve(mapId, size, 1);
    assertEquals("Shuffled bytes: " + size, expectedShuffleMode, mapOutput.getDescription());
    mgr.unreserve(size);
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Text(org.apache.hadoop.io.Text)

Example 20 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestMerger method testInMemoryAndOnDiskMerger.

@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
    JobID jobId = new JobID("a", 0);
    TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
    TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
    TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
    Map<String, String> map2 = new TreeMap<String, String>();
    map1.put("banana", "pretty good");
    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
    InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs1.add(mapOutput1);
    mapOutputs1.add(mapOutput2);
    inMemoryMerger.merge(mapOutputs1);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
    TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
    TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
    // write map outputs
    Map<String, String> map3 = new TreeMap<String, String>();
    map3.put("apple", "awesome");
    map3.put("carrot", "amazing");
    Map<String, String> map4 = new TreeMap<String, String>();
    map4.put("banana", "bla");
    byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
    byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
    InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
    System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
    System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
    //    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs2.add(mapOutput3);
    mapOutputs2.add(mapOutput4);
    inMemoryMerger2.merge(mapOutputs2);
    Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
    List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
    Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
    List<String> keys = new ArrayList<String>();
    List<String> values = new ArrayList<String>();
    while (iterator.hasNext()) {
        CompressAwarePath next = iterator.next();
        readOnDiskMapOutput(conf, fs, next, keys, values);
        paths.add(next);
    }
    Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
    mergeManager.close();
    mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
    onDiskMerger.merge(paths);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    keys = new ArrayList<String>();
    values = new ArrayList<String>();
    readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
    Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
    mergeManager.close();
    Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
    Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
Also used : MROutputFiles(org.apache.hadoop.mapred.MROutputFiles) Progress(org.apache.hadoop.util.Progress) TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7