Search in sources :

Example 46 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMultipleInputs method testAddInputPathWithFormat.

@Test
public void testAddInputPathWithFormat() throws IOException {
    final Job conf = Job.getInstance();
    MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class);
    MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class);
    final Map<Path, InputFormat> inputs = MultipleInputs.getInputFormatMap(conf);
    assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
    assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")).getClass());
}
Also used : Path(org.apache.hadoop.fs.Path) InputFormat(org.apache.hadoop.mapreduce.InputFormat) KeyValueTextInputFormat(org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 47 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMapReduceJobControl method createDependencies.

/**
   * This is a main function for testing JobControl class.
   * It requires 4 jobs: 
   *      Job 1: passed as parameter. input:indir  output:outdir_1
   *      Job 2: copy data from indir to outdir_2
   *      Job 3: copy data from outdir_1 and outdir_2 to outdir_3
   *      Job 4: copy data from outdir to outdir_4
   * The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1 and 2.
   * The job 4 depends on job 3.
   * 
   * Then it creates a JobControl object and add the 4 jobs to 
   * the JobControl object.
   * Finally, it creates a thread to run the JobControl object
   */
private JobControl createDependencies(Configuration conf, Job job1) throws Exception {
    List<ControlledJob> dependingJobs = null;
    cjob1 = new ControlledJob(job1, dependingJobs);
    Job job2 = MapReduceTestUtil.createCopyJob(conf, outdir_2, indir);
    cjob2 = new ControlledJob(job2, dependingJobs);
    Job job3 = MapReduceTestUtil.createCopyJob(conf, outdir_3, outdir_1, outdir_2);
    dependingJobs = new ArrayList<ControlledJob>();
    dependingJobs.add(cjob1);
    dependingJobs.add(cjob2);
    cjob3 = new ControlledJob(job3, dependingJobs);
    Job job4 = MapReduceTestUtil.createCopyJob(conf, outdir_4, outdir_3);
    dependingJobs = new ArrayList<ControlledJob>();
    dependingJobs.add(cjob3);
    cjob4 = new ControlledJob(job4, dependingJobs);
    JobControl theControl = new JobControl("Test");
    theControl.addJob(cjob1);
    theControl.addJob(cjob2);
    theControl.addJob(cjob3);
    theControl.addJob(cjob4);
    Thread theController = new Thread(theControl);
    theController.start();
    return theControl;
}
Also used : Job(org.apache.hadoop.mapreduce.Job)

Example 48 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMapReduceJobControlWithMocks method testErrorWhileSubmitting.

@Test
public void testErrorWhileSubmitting() throws Exception {
    JobControl jobControl = new JobControl("Test");
    Job mockJob = mock(Job.class);
    ControlledJob job1 = new ControlledJob(mockJob, null);
    when(mockJob.getConfiguration()).thenReturn(new Configuration());
    doThrow(new IncompatibleClassChangeError("This is a test")).when(mockJob).submit();
    jobControl.addJob(job1);
    runJobControl(jobControl);
    try {
        assertEquals("Success list", 0, jobControl.getSuccessfulJobList().size());
        assertEquals("Failed list", 1, jobControl.getFailedJobList().size());
        assertTrue(job1.getJobState() == ControlledJob.State.FAILED);
    } finally {
        jobControl.stop();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 49 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMapReduceJobControlWithMocks method createJob.

private Job createJob(boolean complete, boolean successful) throws IOException, InterruptedException {
    // Create a stub Job that responds in a controlled way
    Job mockJob = mock(Job.class);
    when(mockJob.getConfiguration()).thenReturn(new Configuration());
    when(mockJob.isComplete()).thenReturn(complete);
    when(mockJob.isSuccessful()).thenReturn(successful);
    return mockJob;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job)

Example 50 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestSpeculativeExecution method runSpecTest.

private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClassNotFoundException, InterruptedException {
    Path first = createTempFile("specexec_map_input1", "a\nz");
    Path secnd = createTempFile("specexec_map_input2", "a\nz");
    Configuration conf = mrCluster.getConfig();
    conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapspec);
    conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, redspec);
    conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.class, TaskRuntimeEstimator.class);
    Job job = Job.getInstance(conf);
    job.setJarByClass(TestSpeculativeExecution.class);
    job.setMapperClass(SpeculativeMapper.class);
    job.setReducerClass(SpeculativeReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);
    FileInputFormat.setInputPaths(job, first);
    FileInputFormat.addInputPath(job, secnd);
    FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
    // Delete output directory if it exists.
    try {
        localFs.delete(TEST_OUT_DIR, true);
    } catch (IOException e) {
    // ignore
    }
    // Creates the Job Configuration
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.setMaxMapAttempts(2);
    job.submit();
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33