Search in sources :

Example 6 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterWithDuplicatedCommitInternal.

private void testCommitterWithDuplicatedCommitInternal(int version) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    validateContent(outDir);
    // commit job again on a successful commit job.
    try {
        committer.commitJob(jContext);
        if (version == 1) {
            Assert.fail("Duplicate commit success: wrong behavior for version 1.");
        }
    } catch (IOException e) {
        if (version == 2) {
            Assert.fail("Duplicate commit failed: wrong behavior for version 2.");
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 7 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestFileOutputCommitter method testAbortInternal.

private void testAbortInternal(int version) throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    committer.abortTask(tContext);
    File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());
    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 8 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestFileOutputCommitter method testCommitterWithFailureInternal.

private void testCommitterWithFailureInternal(int version, int maxAttempts) throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, maxAttempts);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new CommitterWithFailedThenSucceed(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    try {
        committer.commitJob(jContext);
        // (1,1), (1,2), (2,1) shouldn't reach to here.
        if (version == 1 || maxAttempts <= 1) {
            Assert.fail("Commit successful: wrong behavior for version 1.");
        }
    } catch (IOException e) {
        // (2,2) shouldn't reach to here.
        if (version == 2 && maxAttempts > 2) {
            Assert.fail("Commit failed: wrong behavior for version 2.");
        }
    }
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) MapFile(org.apache.hadoop.io.MapFile) File(java.io.File)

Example 9 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestDbClasses method testDataDrivenDBInputFormat.

@Test(timeout = 10000)
public void testDataDrivenDBInputFormat() throws Exception {
    JobContext jobContext = mock(JobContext.class);
    Configuration configuration = new Configuration();
    configuration.setInt(MRJobConfig.NUM_MAPS, 1);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    DataDrivenDBInputFormat<NullDBWritable> format = new DataDrivenDBInputFormat<NullDBWritable>();
    List<InputSplit> splits = format.getSplits(jobContext);
    assertEquals(1, splits.size());
    DataDrivenDBInputSplit split = (DataDrivenDBInputSplit) splits.get(0);
    assertEquals("1=1", split.getLowerClause());
    assertEquals("1=1", split.getUpperClause());
    // 2
    configuration.setInt(MRJobConfig.NUM_MAPS, 2);
    DataDrivenDBInputFormat.setBoundingQuery(configuration, "query");
    assertEquals("query", configuration.get(DBConfiguration.INPUT_BOUNDING_QUERY));
    Job job = mock(Job.class);
    when(job.getConfiguration()).thenReturn(configuration);
    DataDrivenDBInputFormat.setInput(job, NullDBWritable.class, "query", "Bounding Query");
    assertEquals("Bounding Query", configuration.get(DBConfiguration.INPUT_BOUNDING_QUERY));
}
Also used : DataDrivenDBInputSplit(org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit) Configuration(org.apache.hadoop.conf.Configuration) NullDBWritable(org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) DBInputSplit(org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit) DataDrivenDBInputSplit(org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit) Test(org.junit.Test)

Example 10 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestJobImpl method testRebootedDuringSetup.

@Test(timeout = 20000)
public void testRebootedDuringSetup() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    OutputCommitter committer = new StubbedOutputCommitter() {

        @Override
        public synchronized void setupJob(JobContext jobContext) throws IOException {
            while (!Thread.interrupted()) {
                try {
                    wait();
                } catch (InterruptedException e) {
                }
            }
        }
    };
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    AppContext mockContext = mock(AppContext.class);
    when(mockContext.isLastAMRetry()).thenReturn(false);
    JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext);
    JobId jobId = job.getID();
    job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
    job.handle(new JobStartEvent(jobId));
    assertJobState(job, JobStateInternal.SETUP);
    job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT));
    assertJobState(job, JobStateInternal.REBOOT);
    // return the external state as RUNNING since otherwise JobClient will
    // exit when it polls the AM for job state
    Assert.assertEquals(JobState.RUNNING, job.getState());
    dispatcher.stop();
    commitHandler.stop();
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) JobStartEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) JobContext(org.apache.hadoop.mapreduce.JobContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

JobContext (org.apache.hadoop.mapreduce.JobContext)85 Configuration (org.apache.hadoop.conf.Configuration)41 Job (org.apache.hadoop.mapreduce.Job)35 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)34 Test (org.junit.Test)31 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)29 InputSplit (org.apache.hadoop.mapreduce.InputSplit)28 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 File (java.io.File)19 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 ArrayList (java.util.ArrayList)13 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)11 JobConf (org.apache.hadoop.mapred.JobConf)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 LongWritable (org.apache.hadoop.io.LongWritable)9 MapFile (org.apache.hadoop.io.MapFile)9 JobID (org.apache.hadoop.mapreduce.JobID)7 FileSystem (org.apache.hadoop.fs.FileSystem)6