Examples with OutputCommitter - org.apache.hadoop.mapreduce.OutputCommitter

Example 11 with OutputCommitter

use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.

the class TestJobImpl method testKilledDuringKillAbort.

@Test(timeout = 20000)
public void testKilledDuringKillAbort() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    // not initializing dispatcher to avoid potential race condition between
    // the dispatcher thread & test thread - see MAPREDUCE-6831
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    OutputCommitter committer = new StubbedOutputCommitter() {

        @Override
        public synchronized void abortJob(JobContext jobContext, State state) throws IOException {
            while (!Thread.interrupted()) {
                try {
                    wait();
                } catch (InterruptedException e) {
                }
            }
        }
    };
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
    JobId jobId = job.getID();
    job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
    job.handle(new JobStartEvent(jobId));
    assertJobState(job, JobStateInternal.SETUP);
    job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    assertJobState(job, JobStateInternal.KILL_ABORT);
    job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    assertJobState(job, JobStateInternal.KILLED);
    dispatcher.stop();
    commitHandler.stop();
}

Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) State(org.apache.hadoop.mapreduce.JobStatus.State) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) NodeState(org.apache.hadoop.yarn.api.records.NodeState) JobState(org.apache.hadoop.mapreduce.v2.api.records.JobState) JobStartEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) JobContext(org.apache.hadoop.mapreduce.JobContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 12 with OutputCommitter

use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.

the class TestJobImpl method testCommitJobFailsJob.

@Test(timeout = 20000)
public void testCommitJobFailsJob() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
    CyclicBarrier syncBarrier = new CyclicBarrier(2);
    OutputCommitter committer = new TestingOutputCommitter(syncBarrier, false);
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null);
    completeJobTasks(job);
    assertJobState(job, JobStateInternal.COMMITTING);
    // let the committer fail and verify the job fails
    syncBarrier.await();
    assertJobState(job, JobStateInternal.FAILED);
    dispatcher.stop();
    commitHandler.stop();
}

Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) CyclicBarrier(java.util.concurrent.CyclicBarrier) Test(org.junit.Test)

Example 13 with OutputCommitter

use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.

the class TestRecovery method testRecoveryAllAttemptsKilled.

@Test
public void testRecoveryAllAttemptsKilled() {
    LOG.info("--- START:  testRecoveryAllAttemptsKilled ---");
    long clusterTimestamp = System.currentTimeMillis();
    EventHandler mockEventHandler = mock(EventHandler.class);
    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, mockEventHandler);
    TaskId taskId = recoverMapTask.getID();
    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
    TaskID taskID = new TaskID(jobID, org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
    //Mock up the TaskAttempts
    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts = new HashMap<TaskAttemptID, TaskAttemptInfo>();
    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, TaskAttemptState.KILLED);
    mockTaskAttempts.put(taId1, mockTAinfo1);
    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, TaskAttemptState.KILLED);
    mockTaskAttempts.put(taId2, mockTAinfo2);
    OutputCommitter mockCommitter = mock(OutputCommitter.class);
    TaskInfo mockTaskInfo = mock(TaskInfo.class);
    when(mockTaskInfo.getTaskStatus()).thenReturn("KILLED");
    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
    recoverMapTask.handle(new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
    verify(mockEventHandler, atLeast(1)).handle((org.apache.hadoop.yarn.event.Event) arg.capture());
    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates = new HashMap<TaskAttemptID, TaskAttemptState>();
    finalAttemptStates.put(taId1, TaskAttemptState.KILLED);
    finalAttemptStates.put(taId2, TaskAttemptState.KILLED);
    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
    jobHistoryEvents.add(EventType.TASK_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
    jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED);
    jobHistoryEvents.add(EventType.TASK_FAILED);
    recoveryChecker(recoverMapTask, TaskState.KILLED, finalAttemptStates, arg, jobHistoryEvents, 2L, 0L);
}

Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) TaskId(org.apache.hadoop.mapreduce.v2.api.records.TaskId) TaskID(org.apache.hadoop.mapreduce.TaskID) HashMap(java.util.HashMap) TaskAttemptEventType(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType) EventType(org.apache.hadoop.mapreduce.jobhistory.EventType) TaskEventType(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList) EventHandler(org.apache.hadoop.yarn.event.EventHandler) JobHistoryEventHandler(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler) MapTaskImpl(org.apache.hadoop.mapreduce.v2.app.job.impl.MapTaskImpl) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) TaskAttemptState(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) TaskAttemptContainerLaunchedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent) Event(org.apache.hadoop.mapreduce.jobhistory.Event) TaskRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent) JobTaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobCounterUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent) ContainerLauncherEvent(org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent) TaskAttemptEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent) TaskEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent) JobID(org.apache.hadoop.mapreduce.JobID) TaskRecoverEvent(org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent) Test(org.junit.Test)

Example 14 with OutputCommitter

use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}

Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 15 with OutputCommitter

use of org.apache.hadoop.mapreduce.OutputCommitter in project hadoop by apache.

the class TestGridMixClasses method testSleepReducer.

/*
   * test SleepReducer
   */
@Test(timeout = 3000)
public void testSleepReducer() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskId = new TaskAttemptID();
    RawKeyValueIterator input = new FakeRawKeyValueReducerIterator();
    Counter counter = new GenericCounter();
    Counter inputValueCounter = new GenericCounter();
    RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new DummyReporter();
    RawComparator<GridmixKey> comparator = new FakeRawComparator();
    ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>(conf, taskId, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, NullWritable.class);
    org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context<GridmixKey, NullWritable, NullWritable, NullWritable> context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>().getReducerContext(reducecontext);
    SleepReducer test = new SleepReducer();
    long start = System.currentTimeMillis();
    test.setup(context);
    long sleeper = context.getCurrentKey().getReduceOutputBytes();
    // status has been changed
    assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus());
    // should sleep 0.9 sec
    assertTrue(System.currentTimeMillis() >= (start + sleeper));
    test.cleanup(context);
    // status has been changed again
    assertEquals("Slept for " + sleeper, context.getStatus());
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) Counter(org.apache.hadoop.mapreduce.Counter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator) SleepReducer(org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) SleepReducer(org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Aggregations

OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)47 Test (org.junit.Test)29 Configuration (org.apache.hadoop.conf.Configuration)23 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)18 JobContext (org.apache.hadoop.mapreduce.JobContext)13 CommitterEventHandler (org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler)13 JobEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent)11 AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)11 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)10 IOException (java.io.IOException)8 JobTaskEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent)8 HashMap (java.util.HashMap)7 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)7 ArrayList (java.util.ArrayList)6 Map (java.util.Map)6 NullWritable (org.apache.hadoop.io.NullWritable)6 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)6 AppContext (org.apache.hadoop.mapreduce.v2.app.AppContext)6 JobStartEvent (org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent)6 TaskAttemptEvent (org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent)6