Search in sources :

Example 61 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method runFailingMapperJob.

protected Job runFailingMapperJob() throws IOException, InterruptedException, ClassNotFoundException {
    Configuration myConf = new Configuration(mrCluster.getConfig());
    myConf.setInt(MRJobConfig.NUM_MAPS, 1);
    //reduce the number of attempts
    myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
    Job job = Job.getInstance(myConf);
    job.setJarByClass(FailingMapper.class);
    job.setJobName("failmapper");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RandomInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(FailingMapper.class);
    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output"));
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertFalse(succeeded);
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob)

Example 62 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobsWithHistoryService method testJobHistoryData.

@Test(timeout = 90000)
public void testJobHistoryData() throws IOException, InterruptedException, AvroRemoteException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(mrCluster.getConfig());
    // Job with 3 maps and 2 reduces
    Job job = sleepJob.createJob(3, 2, 1000, 1, 500, 1);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    Counters counterMR = job.getCounters();
    JobId jobId = TypeConverter.toYarn(job.getJobID());
    ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    Assert.assertEquals(RMAppState.FINISHED, mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState());
    Counters counterHS = job.getCounters();
    //TODO the Assert below worked. need to check
    //Should we compare each field or convert to V2 counter and compare
    LOG.info("CounterHS " + counterHS);
    LOG.info("CounterMR " + counterMR);
    Assert.assertEquals(counterHS, counterMR);
    HSClientProtocol historyClient = instantiateHistoryProxy();
    GetJobReportRequest gjReq = Records.newRecord(GetJobReportRequest.class);
    gjReq.setJobId(jobId);
    JobReport jobReport = historyClient.getJobReport(gjReq).getJobReport();
    verifyJobReport(jobReport, jobId);
}
Also used : HSClientProtocol(org.apache.hadoop.mapreduce.v2.api.HSClientProtocol) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Counters(org.apache.hadoop.mapreduce.Counters) SleepJob(org.apache.hadoop.mapreduce.SleepJob) Job(org.apache.hadoop.mapreduce.Job) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) GetJobReportRequest(org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest) JobReport(org.apache.hadoop.mapreduce.v2.api.records.JobReport) Test(org.junit.Test)

Example 63 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestInputSampler method testSplitSampler.

/**
   * Verify SplitSampler contract, that an equal number of records are taken
   * from the first splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
    final int TOT_SPLITS = 15;
    final int NUM_SPLITS = 5;
    final int STEP_SAMPLE = 5;
    final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i * STEP_SAMPLE;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : Job(org.apache.hadoop.mapreduce.Job) NullWritable(org.apache.hadoop.io.NullWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 64 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestInputSampler method testIntervalSampler.

/**
   * Verify IntervalSampler contract, that samples are taken at regular
   * intervals from the given splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
    final int TOT_SPLITS = 16;
    final int PER_SPLIT_SAMPLE = 4;
    final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
    final double FREQ = 1.0 / TOT_SPLITS;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 65 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRKeyFieldBasedComparator method testComparator.

private void testComparator(String keySpec, int expect) throws Exception {
    String root = System.getProperty("test.build.data", "/tmp");
    Path inDir = new Path(root, "test_cmp/in");
    Path outDir = new Path(root, "test_cmp/out");
    conf.set("mapreduce.partition.keycomparator.options", keySpec);
    conf.set("mapreduce.partition.keypartitioner.options", "-k1.1,1.1");
    conf.set(MRJobConfig.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, line1 + "\n" + line2 + "\n");
    job.setMapperClass(InverseMapper.class);
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setSortComparatorClass(KeyFieldBasedComparator.class);
    job.setPartitionerClass(KeyFieldBasedPartitioner.class);
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());
    // validate output
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter()));
    if (outputFiles.length > 0) {
        InputStream is = getFileSystem().open(outputFiles[0]);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        String line = reader.readLine();
        //lines
        if (expect == 1) {
            assertTrue(line.startsWith(line1));
        } else if (expect == 2) {
            assertTrue(line.startsWith(line2));
        }
        line = reader.readLine();
        if (expect == 1) {
            assertTrue(line.startsWith(line2));
        } else if (expect == 2) {
            assertTrue(line.startsWith(line1));
        }
        reader.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) InputStreamReader(java.io.InputStreamReader) Utils(org.apache.hadoop.mapred.Utils) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33