use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testMapFileOutputCommitterInternal.
private void testMapFileOutputCommitterInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
MapFileOutputFormat theOutputFormat = new MapFileOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeMapFileOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// hidden filenames (_ or . prefixes)
try {
MapFileOutputFormat.getReaders(outDir, conf);
} catch (Exception e) {
fail("Fail to read from MapFileOutputFormat: " + e);
e.printStackTrace();
}
// validate output
validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testRecoveryInternal.
private void testRecoveryInternal(int commitVersion, int recoveryVersion) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
Path jobTempDir1 = committer.getCommittedTaskPath(tContext);
File jtd = new File(jobTempDir1.toUri().getPath());
if (commitVersion == 1) {
assertTrue("Version 1 commits to temporary dir " + jtd, jtd.exists());
validateContent(jtd);
} else {
assertFalse("Version 2 commits to output dir " + jtd, jtd.exists());
}
//now while running the second app attempt,
//recover the task output from first attempt
Configuration conf2 = job.getConfiguration();
conf2.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf2.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 2);
conf2.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, recoveryVersion);
JobContext jContext2 = new JobContextImpl(conf2, taskID.getJobID());
TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID);
FileOutputCommitter committer2 = new FileOutputCommitter(outDir, tContext2);
committer2.setupJob(tContext2);
Path jobTempDir2 = committer2.getCommittedTaskPath(tContext2);
File jtd2 = new File(jobTempDir2.toUri().getPath());
committer2.recoverTask(tContext2);
if (recoveryVersion == 1) {
assertTrue("Version 1 recovers to " + jtd2, jtd2.exists());
validateContent(jtd2);
} else {
assertFalse("Version 2 commits to output dir " + jtd2, jtd2.exists());
if (commitVersion == 1) {
assertTrue("Version 2 recovery moves to output dir from " + jtd, jtd.list().length == 0);
}
}
committer2.commitJob(jContext2);
validateContent(outDir);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testCommitterWithDuplicatedCommitInternal.
private void testCommitterWithDuplicatedCommitInternal(int version) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
// validate output
validateContent(outDir);
// commit job again on a successful commit job.
try {
committer.commitJob(jContext);
if (version == 1) {
Assert.fail("Duplicate commit success: wrong behavior for version 1.");
}
} catch (IOException e) {
if (version == 2) {
Assert.fail("Duplicate commit failed: wrong behavior for version 2.");
}
}
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testAbortInternal.
private void testAbortInternal(int version) throws IOException, InterruptedException {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// do setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do abort
committer.abortTask(tContext);
File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
assertFalse("task temp dir still exists", expectedFile.exists());
committer.abortJob(jContext, JobStatus.State.FAILED);
expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
assertFalse("job temp dir still exists", expectedFile.exists());
assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestFileOutputCommitter method testCommitterWithFailureInternal.
private void testCommitterWithFailureInternal(int version, int maxAttempts) throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, maxAttempts);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do commit
committer.commitTask(tContext);
try {
committer.commitJob(jContext);
// (1,1), (1,2), (2,1) shouldn't reach to here.
if (version == 1 || maxAttempts <= 1) {
Assert.fail("Commit successful: wrong behavior for version 1.");
}
} catch (IOException e) {
// (2,2) shouldn't reach to here.
if (version == 2 && maxAttempts > 2) {
Assert.fail("Commit failed: wrong behavior for version 2.");
}
}
FileUtil.fullyDelete(new File(outDir.toString()));
}
Aggregations