Search in sources :

Example 11 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestCommitterEventHandler method testBasic.

@Test
public void testBasic() throws Exception {
    AppContext mockContext = mock(AppContext.class);
    OutputCommitter mockCommitter = mock(OutputCommitter.class);
    Clock mockClock = mock(Clock.class);
    CommitterEventHandler handler = new CommitterEventHandler(mockContext, mockCommitter, new TestingRMHeartbeatHandler());
    YarnConfiguration conf = new YarnConfiguration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    JobContext mockJobContext = mock(JobContext.class);
    ApplicationAttemptId attemptid = ApplicationAttemptId.fromString("appattempt_1234567890000_0001_0");
    JobId jobId = TypeConverter.toYarn(TypeConverter.fromYarn(attemptid.getApplicationId()));
    WaitForItHandler waitForItHandler = new WaitForItHandler();
    when(mockContext.getApplicationID()).thenReturn(attemptid.getApplicationId());
    when(mockContext.getApplicationAttemptId()).thenReturn(attemptid);
    when(mockContext.getEventHandler()).thenReturn(waitForItHandler);
    when(mockContext.getClock()).thenReturn(mockClock);
    handler.init(conf);
    handler.start();
    try {
        handler.handle(new CommitterJobCommitEvent(jobId, mockJobContext));
        String user = UserGroupInformation.getCurrentUser().getShortUserName();
        Path startCommitFile = MRApps.getStartJobCommitFile(conf, user, jobId);
        Path endCommitSuccessFile = MRApps.getEndJobCommitSuccessFile(conf, user, jobId);
        Path endCommitFailureFile = MRApps.getEndJobCommitFailureFile(conf, user, jobId);
        Event e = waitForItHandler.getAndClearEvent();
        assertNotNull(e);
        assertTrue(e instanceof JobCommitCompletedEvent);
        FileSystem fs = FileSystem.get(conf);
        assertTrue(startCommitFile.toString(), fs.exists(startCommitFile));
        assertTrue(endCommitSuccessFile.toString(), fs.exists(endCommitSuccessFile));
        assertFalse(endCommitFailureFile.toString(), fs.exists(endCommitFailureFile));
        verify(mockCommitter).commitJob(any(JobContext.class));
    } finally {
        handler.stop();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) AppContext(org.apache.hadoop.mapreduce.v2.app.AppContext) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Clock(org.apache.hadoop.yarn.util.Clock) SystemClock(org.apache.hadoop.yarn.util.SystemClock) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) JobCommitCompletedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitCompletedEvent) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) Event(org.apache.hadoop.yarn.event.Event) JobCommitFailedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitFailedEvent) JobCommitCompletedEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitCompletedEvent) JobContext(org.apache.hadoop.mapreduce.JobContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 12 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project hadoop by apache.

the class TestJobImpl method testKilledDuringKillAbort.

@Test(timeout = 20000)
public void testKilledDuringKillAbort() throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
    // not initializing dispatcher to avoid potential race condition between
    // the dispatcher thread & test thread - see MAPREDUCE-6831
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    OutputCommitter committer = new StubbedOutputCommitter() {

        @Override
        public synchronized void abortJob(JobContext jobContext, State state) throws IOException {
            while (!Thread.interrupted()) {
                try {
                    wait();
                } catch (InterruptedException e) {
                }
            }
        }
    };
    CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer);
    commitHandler.init(conf);
    commitHandler.start();
    JobImpl job = createStubbedJob(conf, dispatcher, 2, null);
    JobId jobId = job.getID();
    job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
    job.handle(new JobStartEvent(jobId));
    assertJobState(job, JobStateInternal.SETUP);
    job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    assertJobState(job, JobStateInternal.KILL_ABORT);
    job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    assertJobState(job, JobStateInternal.KILLED);
    dispatcher.stop();
    commitHandler.stop();
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) State(org.apache.hadoop.mapreduce.JobStatus.State) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) NodeState(org.apache.hadoop.yarn.api.records.NodeState) JobState(org.apache.hadoop.mapreduce.v2.api.records.JobState) JobStartEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent) CommitterEventHandler(org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler) JobContext(org.apache.hadoop.mapreduce.JobContext) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 13 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project beam by apache.

the class HadoopInputFormatIOTest method testComputeSplitsIfGetSplitsReturnsEmptyList.

/**
   * This test validates behavior of
   * {@link HadoopInputFormatBoundedSource#computeSplitsIfNecessary() computeSplits()} when Hadoop
   * InputFormat's {@link InputFormat#getSplits(JobContext)} returns empty list.
   */
@Test
public void testComputeSplitsIfGetSplitsReturnsEmptyList() throws Exception {
    InputFormat<?, ?> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    SerializableSplit mockInputSplit = Mockito.mock(SerializableSplit.class);
    Mockito.when(mockInputFormat.getSplits(Mockito.any(JobContext.class))).thenReturn(new ArrayList<InputSplit>());
    HadoopInputFormatBoundedSource<Text, Employee> hifSource = new HadoopInputFormatBoundedSource<Text, Employee>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), // No key translation required.
    null, // No value translation required.
    null, mockInputSplit);
    thrown.expect(IOException.class);
    thrown.expectMessage("Error in computing splits, getSplits() returns a empty list");
    hifSource.setInputFormatObj(mockInputFormat);
    hifSource.computeSplitsIfNecessary();
}
Also used : HadoopInputFormatBoundedSource(org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.HadoopInputFormatBoundedSource) SerializableSplit(org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIO.SerializableSplit) Text(org.apache.hadoop.io.Text) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) NewObjectsEmployeeInputSplit(org.apache.beam.sdk.io.hadoop.inputformat.EmployeeInputFormat.NewObjectsEmployeeInputSplit) Test(org.junit.Test)

Example 14 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.

the class AbstractBlankNodeTests method blank_node_divergence_02.

/**
     * Test that starts with two blank nodes with the same identity in a single
     * file, splits them over two files and shows that they diverge in the
     * subsequent job when the JENA-820 workaround is not enabled
     * 
     * @throws IOException
     * @throws InterruptedException
     */
@Test
public void blank_node_divergence_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
    // Temporary files
    File a = File.createTempFile("bnode_divergence", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_divergence", new FileAttribute[0]).toFile();
    try {
        // Prepare the input data
        // Two mentions of the same blank node in the same file
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(a, tuples);
        // Set up fake job which will process the file as a single split
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());
        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            // Copy the input to the output - each triple goes to a separate
            // output file
            // This is how we force multiple files to be produced
            int taskID = 1;
            while (reader.nextKeyValue()) {
                // Prepare the output writing
                OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
                TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, ++taskID, 1));
                RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
                writer.close(outputTaskContext);
            }
        }
        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);
        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // As described in JENA-820 at this point the blank nodes are
        // consistent, however when we read them from different files they
        // by default get treated as different nodes and so the blank nodes
        // diverge which is incorrect and undesirable behaviour in
        // multi-stage pipelines. However it is the default behaviour
        // because when we start from external inputs we want them to be
        // file scoped.
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        // Make sure JENA-820 flag is disabled
        job.getConfiguration().setBoolean(RdfIOConstants.GLOBAL_BNODE_IDENTITY, false);
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());
        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes should have diverged
        Assert.assertEquals(2, nodes.size());
    } finally {
        a.delete();
        deleteDirectory(intermediateOutputDir);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Node(org.apache.jena.graph.Node) ArrayList(java.util.ArrayList) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) File(java.io.File) FileAttribute(java.nio.file.attribute.FileAttribute) Test(org.junit.Test)

Example 15 with JobContext

use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.

the class AbstractBlankNodeTests method blank_node_identity_01.

/**
     * Test that starts with two blank nodes in two different files and checks
     * that writing them to a single file does not conflate them
     * 
     * @throws IOException
     * @throws InterruptedException
     */
@Test
public void blank_node_identity_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();
    try {
        // Prepare the input data
        // Different blank nodes in different files
        List<T> tuples = new ArrayList<>();
        Node bnode1 = NodeFactory.createBlankNode();
        Node bnode2 = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);
        tuples.clear();
        tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);
        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());
        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);
        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);
        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());
        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not have converged
        Assert.assertEquals(2, nodes.size());
    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Node(org.apache.jena.graph.Node) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) FileAttribute(java.nio.file.attribute.FileAttribute) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

JobContext (org.apache.hadoop.mapreduce.JobContext)85 Configuration (org.apache.hadoop.conf.Configuration)41 Job (org.apache.hadoop.mapreduce.Job)35 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)34 Test (org.junit.Test)31 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)29 InputSplit (org.apache.hadoop.mapreduce.InputSplit)28 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)25 Path (org.apache.hadoop.fs.Path)24 IOException (java.io.IOException)22 File (java.io.File)19 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)16 ArrayList (java.util.ArrayList)13 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)11 JobConf (org.apache.hadoop.mapred.JobConf)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 LongWritable (org.apache.hadoop.io.LongWritable)9 MapFile (org.apache.hadoop.io.MapFile)9 JobID (org.apache.hadoop.mapreduce.JobID)7 FileSystem (org.apache.hadoop.fs.FileSystem)6