use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.
the class TestJobImpl method testCheckJobCompleteSuccess.
@Test(timeout = 20000)
public void testCheckJobCompleteSuccess() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
CyclicBarrier syncBarrier = new CyclicBarrier(2);
OutputCommitter committer = new TestingOutputCommitter(syncBarrier, true);
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null);
completeJobTasks(job);
assertJobState(job, JobStateInternal.COMMITTING);
job.handle(new JobEvent(job.getID(), JobEventType.JOB_TASK_ATTEMPT_COMPLETED));
assertJobState(job, JobStateInternal.COMMITTING);
job.handle(new JobEvent(job.getID(), JobEventType.JOB_MAP_TASK_RESCHEDULED));
assertJobState(job, JobStateInternal.COMMITTING);
// let the committer complete and verify the job succeeds
syncBarrier.await();
assertJobState(job, JobStateInternal.SUCCEEDED);
job.handle(new JobEvent(job.getID(), JobEventType.JOB_TASK_ATTEMPT_COMPLETED));
assertJobState(job, JobStateInternal.SUCCEEDED);
job.handle(new JobEvent(job.getID(), JobEventType.JOB_MAP_TASK_RESCHEDULED));
assertJobState(job, JobStateInternal.SUCCEEDED);
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.
the class TestJobImpl method testKilledDuringFailAbort.
@Test(timeout = 20000)
public void testKilledDuringFailAbort() throws Exception {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
OutputCommitter committer = new StubbedOutputCommitter() {
@Override
public void setupJob(JobContext jobContext) throws IOException {
throw new IOException("forced failure");
}
@Override
public synchronized void abortJob(JobContext jobContext, State state) throws IOException {
while (!Thread.interrupted()) {
try {
wait();
} catch (InterruptedException e) {
}
}
}
};
CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
JobId jobId = job.getID();
job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
assertJobState(job, JobStateInternal.INITED);
job.handle(new JobStartEvent(jobId));
assertJobState(job, JobStateInternal.FAIL_ABORT);
job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
assertJobState(job, JobStateInternal.KILLED);
dispatcher.stop();
commitHandler.stop();
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.
the class TestFileOutputCommitter method testConcurrentCommitTaskWithSubDir.
private void testConcurrentCommitTaskWithSubDir(int version) throws Exception {
final Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
final Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
conf.setClass("fs.file.impl", RLFS.class, FileSystem.class);
FileSystem.closeAll();
final JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
final FileOutputCommitter amCommitter = new FileOutputCommitter(outDir, jContext);
amCommitter.setupJob(jContext);
final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2];
taCtx[0] = new TaskAttemptContextImpl(conf, taskID);
taCtx[1] = new TaskAttemptContextImpl(conf, taskID1);
final TextOutputFormat[] tof = new TextOutputFormat[2];
for (int i = 0; i < tof.length; i++) {
tof[i] = new TextOutputFormat() {
@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context);
return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension));
}
};
}
final ExecutorService executor = HadoopExecutors.newFixedThreadPool(2);
try {
for (int i = 0; i < taCtx.length; i++) {
final int taskIdx = i;
executor.submit(new Callable<Void>() {
@Override
public Void call() throws IOException, InterruptedException {
final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]);
outputCommitter.setupTask(taCtx[taskIdx]);
final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]);
writeOutput(rw, taCtx[taskIdx]);
outputCommitter.commitTask(taCtx[taskIdx]);
return null;
}
});
}
} finally {
executor.shutdown();
while (!executor.awaitTermination(1, TimeUnit.SECONDS)) {
LOG.info("Awaiting thread termination!");
}
}
amCommitter.commitJob(jContext);
final RawLocalFileSystem lfs = new RawLocalFileSystem();
lfs.setConf(conf);
assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR)));
// validate output
validateContent(OUT_SUB_DIR);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.
the class TestGridMixClasses method testSleepMapper.
/*
* test SleepMapper
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
@Test(timeout = 30000)
public void testSleepMapper() throws Exception {
SleepJob.SleepMapper test = new SleepJob.SleepMapper();
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
FakeRecordLLReader reader = new FakeRecordLLReader();
LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
SleepSplit split = getSleepSplit();
MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(conf, taskId, reader, writer, committer, reporter, split);
Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>().getMapContext(mapcontext);
long start = System.currentTimeMillis();
LOG.info("start:" + start);
LongWritable key = new LongWritable(start + 2000);
LongWritable value = new LongWritable(start + 2000);
// should slip 2 sec
test.map(key, value, context);
LOG.info("finish:" + System.currentTimeMillis());
assertTrue(System.currentTimeMillis() >= (start + 2000));
test.cleanup(context);
assertEquals(1, writer.getData().size());
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project cdap by caskdata.
the class MultipleOutputsCommitter method setupJob.
@Override
public void setupJob(JobContext jobContext) throws IOException {
for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
JobContext namedJobContext = MultipleOutputs.getNamedJobContext(jobContext, committer.getKey());
committer.getValue().setupJob(namedJobContext);
}
}
Aggregations