Examples with TaskAttemptContext - org.apache.hadoop.mapreduce.TaskAttemptContext

Example 81 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterRetryInternal.

// retry committer for 2 times.
private void testCommitterRetryInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    // only attempt for 1 time.
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, 1);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    try {
        committer.commitJob(jContext);
        Assert.fail("Commit successful: wrong behavior for the first time " + "commit.");
    } catch (IOException e) {
        // commit again.
        try {
            committer.commitJob(jContext);
            // version 1 shouldn't reach to here.
            if (version == 1) {
                Assert.fail("Commit successful after retry: wrong behavior for " + "version 1.");
            }
        } catch (FileNotFoundException ex) {
            if (version == 2) {
                Assert.fail("Commit failed after retry: wrong behavior for" + " version 2.");
            }
            assertTrue(ex.getMessage().contains(committer.getJobAttemptPath(jContext).toString() + " does not exist"));
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) FileNotFoundException(java.io.FileNotFoundException) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 82 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterInternal.

private void testCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 83 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFileOutputCommitter method testInvalidVersionNumber.

public void testInvalidVersionNumber() throws IOException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    try {
        new FileOutputCommitter(outDir, tContext);
        fail("should've thrown an exception!");
    } catch (IOException e) {
    //test passed
    }
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Example 84 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFileOutputCommitter method testConcurrentCommitTaskWithSubDir.

private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
    final Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    final Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
    FileSystem.closeAll();
    final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    final FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);
    amCommitter.setupJob(jContext);
    final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
    taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
    taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
    final TextOutputFormat[] tof = new TextOutputFormat[2];
    for (int i = 0; i < tof.length; i++) {
        tof[i] = new TextOutputFormat() {

            @Override
            public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
                final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
                return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
            }
        };
    }
    final ExecutorService executor = HadoopExecutors.newFixedThreadPool(2);
    try {
        for (int i = 0; i < taCtx.length; i++) {
            final int taskIdx = i;
            executor.submit(new Callable<Void>() {

                @Override
                public Void call() throws IOException, InterruptedException {
                    final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
                    outputCommitter.setupTask(taCtx[taskIdx]);
                    final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
                    writeOutput(rw, taCtx[taskIdx]);
                    outputCommitter.commitTask(taCtx[taskIdx]);
                    return null;
                }
            });
        }
    } finally {
        executor.shutdown();
        while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
            LOG.info("Awaiting thread termination!");
        }
    }
    amCommitter.commitJob(jContext);
    final RawLocalFileSystem lfs = new RawLocalFileSystem();
    lfs.setConf(conf);
    assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
    // validate output
    validateContent(OUT_SUB_DIR);
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) ExecutorService(java.util.concurrent.ExecutorService) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 85 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.

the class TestFileOutputCommitter method testFailAbortInternal.

private void testFailAbortInternal(int version) throws IOException, InterruptedException {
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
    conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    FileOutputFormat.setOutputPath(job, outDir);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter<?, ?> theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    Throwable th = null;
    try {
        committer.abortTask(tContext);
    } catch (IOException ie) {
        th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    Path jtd = committer.getJobAttemptPath(jContext);
    File jobTmpDir = new File(jtd.toUri().getPath());
    Path ttd = committer.getTaskAttemptPath(tContext);
    File taskTmpDir = new File(ttd.toUri().getPath());
    File expectedFile = new File(taskTmpDir, partFile);
    assertTrue(expectedFile + " does not exists", expectedFile.exists());
    th = null;
    try {
        committer.abortJob(jContext, JobStatus.State.FAILED);
    } catch (IOException ie) {
        th = ie;
    }
    assertNotNull(th);
    assertTrue(th instanceof IOException);
    assertTrue(th.getMessage().contains("fake delete failed"));
    assertTrue("job temp dir does not exists", jobTmpDir.exists());
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10