Search in sources :

Example 1 with JobContextImpl

use of org.apache.hadoop.mapreduce.task.JobContextImpl in project hadoop by apache.

the class TestContextFactory method setUp.

@Before
public void setUp() throws Exception {
    conf = new Configuration();
    jobId = new JobID("test", 1);
    jobContext = new JobContextImpl(conf, jobId);
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) Before(org.junit.Before)

Example 2 with JobContextImpl

use of org.apache.hadoop.mapreduce.task.JobContextImpl in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testCommitter.

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    File expectedFile = new File(new Path(outDir, partFile).toString());
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append('\t').append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append('\t').append(val2).append("\n");
    String output = UtilsForTests.slurp(expectedFile);
    assertEquals(output, expectedOutput.toString());
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 3 with JobContextImpl

use of org.apache.hadoop.mapreduce.task.JobContextImpl in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testAbort.

@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    committer.abortTask(tContext);
    File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());
    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 4 with JobContextImpl

use of org.apache.hadoop.mapreduce.task.JobContextImpl in project hadoop by apache.

the class TestUniformSizeInputFormat method testGetSplits.

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);
    int sizePerMap = totalFileSize / nMaps;
    checkSplits(listFile, splits);
    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        RecordReader<Text, CopyListingFileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, null);
        StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, 0);
        final TaskAttemptContext taskAttemptContext = stubContext.getContext();
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus[] fileStatus = fs.listStatus(sourcePath);
            if (fileStatus.length > 1) {
                continue;
            }
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);
        doubleCheckedTotalSize += currentSplitSize;
    }
    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) FileStatus(org.apache.hadoop.fs.FileStatus) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 5 with JobContextImpl

use of org.apache.hadoop.mapreduce.task.JobContextImpl in project hadoop by apache.

the class TestDynamicInputFormat method testGetSplits.

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<Text, CopyListingFileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);
    int nFiles = 0;
    int taskId = 0;
    for (InputSplit split : splits) {
        StubContext stubContext = new StubContext(jobContext.getConfiguration(), null, taskId);
        final TaskAttemptContext taskAttemptContext = stubContext.getContext();
        RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        stubContext.setReader(recordReader);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            CopyListingFileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);
        ++taskId;
    }
    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) DistCpOptions(org.apache.hadoop.tools.DistCpOptions) CopyListingFileStatus(org.apache.hadoop.tools.CopyListingFileStatus) StubContext(org.apache.hadoop.tools.StubContext) Test(org.junit.Test)

Aggregations

JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)32 Configuration (org.apache.hadoop.conf.Configuration)29 Job (org.apache.hadoop.mapreduce.Job)22 JobContext (org.apache.hadoop.mapreduce.JobContext)22 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)21 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)21 Path (org.apache.hadoop.fs.Path)17 File (java.io.File)16 IOException (java.io.IOException)11 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)10 MapFile (org.apache.hadoop.io.MapFile)9 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)8 LongWritable (org.apache.hadoop.io.LongWritable)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 Test (org.junit.Test)6 DistCpOptions (org.apache.hadoop.tools.DistCpOptions)5 FileAttribute (java.nio.file.attribute.FileAttribute)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4