Search in sources :

Example 31 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestCombineFileRecordReader method testProgressIsReportedIfInputASeriesOfEmptyFiles.

@SuppressWarnings("unchecked")
@Test
public void testProgressIsReportedIfInputASeriesOfEmptyFiles() throws IOException, InterruptedException {
    JobConf conf = new JobConf();
    Path[] paths = new Path[3];
    File[] files = new File[3];
    long[] fileLength = new long[3];
    try {
        for (int i = 0; i < 3; i++) {
            File dir = new File(outDir.toString());
            dir.mkdir();
            files[i] = new File(dir, "testfile" + i);
            FileWriter fileWriter = new FileWriter(files[i]);
            fileWriter.flush();
            fileWriter.close();
            fileLength[i] = i;
            paths[i] = new Path(outDir + "/testfile" + i);
        }
        CombineFileSplit combineFileSplit = new CombineFileSplit(paths, fileLength);
        TaskAttemptID taskAttemptID = Mockito.mock(TaskAttemptID.class);
        TaskReporter reporter = Mockito.mock(TaskReporter.class);
        TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(conf, taskAttemptID, reporter);
        CombineFileRecordReader cfrr = new CombineFileRecordReader(combineFileSplit, taskAttemptContext, TextRecordReaderWrapper.class);
        cfrr.initialize(combineFileSplit, taskAttemptContext);
        verify(reporter).progress();
        Assert.assertFalse(cfrr.nextKeyValue());
        verify(reporter, times(3)).progress();
    } finally {
        FileUtil.fullyDelete(new File(outDir.toString()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileWriter(java.io.FileWriter) TaskReporter(org.apache.hadoop.mapred.Task.TaskReporter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Example 32 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterRetryInternal.

// retry committer for 2 times.
private void testCommitterRetryInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    // only attempt for 1 time.
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, 1);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    try {
        committer.commitJob(jContext);
        Assert.fail("Commit successful: wrong behavior for the first time " + "commit.");
    } catch (IOException e) {
        // commit again.
        try {
            committer.commitJob(jContext);
            // version 1 shouldn't reach to here.
            if (version == 1) {
                Assert.fail("Commit successful after retry: wrong behavior for " + "version 1.");
            }
        } catch (FileNotFoundException ex) {
            if (version == 2) {
                Assert.fail("Commit failed after retry: wrong behavior for" + " version 2.");
            }
            assertTrue(ex.getMessage().contains(committer.getJobAttemptPath(jContext).toString() + " does not exist"));
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) FileNotFoundException(java.io.FileNotFoundException) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 33 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterInternal.

private void testCommitterInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    validateContent(outDir);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 34 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testInvalidVersionNumber.

public void testInvalidVersionNumber() throws IOException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 3);
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    try {
        new FileOutputCommitter(outDir, tContext);
        fail("should've thrown an exception!");
    } catch (IOException e) {
    //test passed
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Example 35 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestFileOutputCommitter method testConcurrentCommitTaskWithSubDir.

private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
    final Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    final Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
    FileSystem.closeAll();
    final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    final FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);
    amCommitter.setupJob(jContext);
    final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
    taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
    taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
    final TextOutputFormat[] tof = new TextOutputFormat[2];
    for (int i = 0; i < tof.length; i++) {
        tof[i] = new TextOutputFormat() {

            @Override
            public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
                final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
                return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
            }
        };
    }
    final ExecutorService executor = HadoopExecutors.newFixedThreadPool(2);
    try {
        for (int i = 0; i < taCtx.length; i++) {
            final int taskIdx = i;
            executor.submit(new Callable<Void>() {

                @Override
                public Void call() throws IOException, InterruptedException {
                    final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
                    outputCommitter.setupTask(taCtx[taskIdx]);
                    final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
                    writeOutput(rw, taCtx[taskIdx]);
                    outputCommitter.commitTask(taCtx[taskIdx]);
                    return null;
                }
            });
        }
    } finally {
        executor.shutdown();
        while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
            LOG.info("Awaiting thread termination!");
        }
    }
    amCommitter.commitJob(jContext);
    final RawLocalFileSystem lfs = new RawLocalFileSystem();
    lfs.setConf(conf);
    assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
    // validate output
    validateContent(OUT_SUB_DIR);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) RawLocalFileSystem(org.apache.hadoop.fs.RawLocalFileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) ExecutorService(java.util.concurrent.ExecutorService) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Aggregations

TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)47 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)38 Configuration (org.apache.hadoop.conf.Configuration)35 File (java.io.File)25 Job (org.apache.hadoop.mapreduce.Job)23 Path (org.apache.hadoop.fs.Path)22 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)22 JobContext (org.apache.hadoop.mapreduce.JobContext)21 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 Test (org.junit.Test)17 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)12 LongWritable (org.apache.hadoop.io.LongWritable)11 IOException (java.io.IOException)10 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 MapFile (org.apache.hadoop.io.MapFile)9 NullWritable (org.apache.hadoop.io.NullWritable)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FileAttribute (java.nio.file.attribute.FileAttribute)4 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)4