Examples with Job - org.apache.hadoop.mapreduce.Job

Example 51 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestSpeculativeExecution method testSpeculativeExecution.

@Test
public void testSpeculativeExecution() throws Exception {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    /*------------------------------------------------------------------
     * Test that Map/Red does not speculate if MAP_SPECULATIVE and 
     * REDUCE_SPECULATIVE are both false.
     * -----------------------------------------------------------------
     */
    Job job = runSpecTest(false, false);
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Counters counters = job.getCounters();
    Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS).getValue());
    Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES).getValue());
    Assert.assertEquals(0, counters.findCounter(JobCounter.NUM_FAILED_MAPS).getValue());
    /*----------------------------------------------------------------------
     * Test that Mapper speculates if MAP_SPECULATIVE is true and
     * REDUCE_SPECULATIVE is false.
     * ---------------------------------------------------------------------
     */
    job = runSpecTest(true, false);
    succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    counters = job.getCounters();
    // The long-running map will be killed and a new one started.
    Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS).getValue());
    Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES).getValue());
    Assert.assertEquals(0, counters.findCounter(JobCounter.NUM_FAILED_MAPS).getValue());
    Assert.assertEquals(1, counters.findCounter(JobCounter.NUM_KILLED_MAPS).getValue());
    /*----------------------------------------------------------------------
     * Test that Reducer speculates if REDUCE_SPECULATIVE is true and
     * MAP_SPECULATIVE is false.
     * ---------------------------------------------------------------------
     */
    job = runSpecTest(false, true);
    succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    counters = job.getCounters();
    // The long-running map will be killed and a new one started.
    Assert.assertEquals(2, counters.findCounter(JobCounter.TOTAL_LAUNCHED_MAPS).getValue());
    Assert.assertEquals(3, counters.findCounter(JobCounter.TOTAL_LAUNCHED_REDUCES).getValue());
}

Also used : Counters(org.apache.hadoop.mapreduce.Counters) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) Test(org.junit.Test)

Example 52 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class CombinerTest method getJob.

protected static Job getJob(String jobname, Configuration inputConf, String inputpath, String outputpath) throws Exception {
    final Configuration conf = new Configuration(inputConf);
    conf.set("fileoutputpath", outputpath);
    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputpath))) {
        fs.delete(new Path(outputpath), true);
    }
    fs.close();
    final Job job = Job.getInstance(conf, jobname);
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputpath));
    FileOutputFormat.setOutputPath(job, new Path(outputpath));
    return job;
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 53 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class CombinerTest method testWordCountCombiner.

@Test
public void testWordCountCombiner() throws Exception {
    final Configuration nativeConf = ScenarioConfiguration.getNativeConfiguration();
    nativeConf.addResource(TestConstants.COMBINER_CONF_PATH);
    final Job nativejob = getJob("nativewordcount", nativeConf, inputpath, nativeoutputpath);
    final Configuration commonConf = ScenarioConfiguration.getNormalConfiguration();
    commonConf.addResource(TestConstants.COMBINER_CONF_PATH);
    final Job normaljob = getJob("normalwordcount", commonConf, inputpath, hadoopoutputpath);
    assertTrue(nativejob.waitForCompletion(true));
    assertTrue(normaljob.waitForCompletion(true));
    assertEquals(true, ResultVerifier.verify(nativeoutputpath, hadoopoutputpath));
    ResultVerifier.verifyCounters(normaljob, nativejob, true);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 54 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class LargeKVCombinerTest method testLargeValueCombiner.

@Test
public void testLargeValueCombiner() throws Exception {
    final Configuration normalConf = ScenarioConfiguration.getNormalConfiguration();
    final Configuration nativeConf = ScenarioConfiguration.getNativeConfiguration();
    normalConf.addResource(TestConstants.COMBINER_CONF_PATH);
    nativeConf.addResource(TestConstants.COMBINER_CONF_PATH);
    // 4M
    final int deafult_KVSize_Maximum = 1 << 22;
    final int KVSize_Maximum = normalConf.getInt(TestConstants.NATIVETASK_KVSIZE_MAX_LARGEKV_TEST, deafult_KVSize_Maximum);
    final String inputPath = TestConstants.NATIVETASK_COMBINER_TEST_INPUTDIR + "/largeKV";
    final String nativeOutputPath = TestConstants.NATIVETASK_COMBINER_TEST_NATIVE_OUTPUTDIR + "/nativeLargeKV";
    final String hadoopOutputPath = TestConstants.NATIVETASK_COMBINER_TEST_NORMAL_OUTPUTDIR + "/normalLargeKV";
    final FileSystem fs = FileSystem.get(normalConf);
    for (int i = 65536; i <= KVSize_Maximum; i *= 4) {
        int max = i;
        int min = Math.max(i / 4, max - 10);
        LOG.info("===KV Size Test: min size: " + min + ", max size: " + max);
        normalConf.set(TestConstants.NATIVETASK_KVSIZE_MIN, String.valueOf(min));
        normalConf.set(TestConstants.NATIVETASK_KVSIZE_MAX, String.valueOf(max));
        nativeConf.set(TestConstants.NATIVETASK_KVSIZE_MIN, String.valueOf(min));
        nativeConf.set(TestConstants.NATIVETASK_KVSIZE_MAX, String.valueOf(max));
        fs.delete(new Path(inputPath), true);
        new TestInputFile(normalConf.getInt(TestConstants.NATIVETASK_COMBINER_WORDCOUNT_FILESIZE, 1000000), IntWritable.class.getName(), Text.class.getName(), normalConf).createSequenceTestFile(inputPath, 1);
        final Job normaljob = CombinerTest.getJob("normalwordcount", normalConf, inputPath, hadoopOutputPath);
        final Job nativejob = CombinerTest.getJob("nativewordcount", nativeConf, inputPath, nativeOutputPath);
        assertTrue(nativejob.waitForCompletion(true));
        assertTrue(normaljob.waitForCompletion(true));
        final boolean compareRet = ResultVerifier.verify(nativeOutputPath, hadoopOutputPath);
        final String reason = "LargeKVCombinerTest failed with, min size: " + min + ", max size: " + max + ", normal out: " + hadoopOutputPath + ", native Out: " + nativeOutputPath;
        assertEquals(reason, true, compareRet);
        ResultVerifier.verifyCounters(normaljob, nativejob, true);
    }
    fs.close();
}

Also used : Path(org.apache.hadoop.fs.Path) ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) Configuration(org.apache.hadoop.conf.Configuration) TestInputFile(org.apache.hadoop.mapred.nativetask.kvtest.TestInputFile) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 55 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testSleepJobWithSecurityOn.

//@Test (timeout = 60000)
public void testSleepJobWithSecurityOn() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testSleepJobWithSecurityOn().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        return;
    }
    mrCluster.getConfig().set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    mrCluster.getConfig().set(YarnConfiguration.RM_KEYTAB, "/etc/krb5.keytab");
    mrCluster.getConfig().set(YarnConfiguration.NM_KEYTAB, "/etc/krb5.keytab");
    mrCluster.getConfig().set(YarnConfiguration.RM_PRINCIPAL, "rm/sightbusy-lx@LOCALHOST");
    mrCluster.getConfig().set(YarnConfiguration.NM_PRINCIPAL, "nm/sightbusy-lx@LOCALHOST");
    UserGroupInformation.setConfiguration(mrCluster.getConfig());
    // Keep it in here instead of after RM/NM as multiple user logins happen in
    // the same JVM.
    UserGroupInformation user = UserGroupInformation.getCurrentUser();
    LOG.info("User name is " + user.getUserName());
    for (Token<? extends TokenIdentifier> str : user.getTokens()) {
        LOG.info("Token is " + str.encodeToUrlString());
    }
    user.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws Exception {
            SleepJob sleepJob = new SleepJob();
            sleepJob.setConf(mrCluster.getConfig());
            Job job = sleepJob.createJob(3, 0, 10000, 1, 0, 0);
            // //Job with reduces
            // Job job = sleepJob.createJob(3, 2, 10000, 1, 10000, 1);
            // The AppMaster jar itself.
            job.addFileToClassPath(APP_JAR);
            job.submit();
            String trackingUrl = job.getTrackingURL();
            String jobId = job.getJobID().toString();
            job.waitForCompletion(true);
            Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
            Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
            return null;
        }
    });
// TODO later:  add explicit "isUber()" checks of some sort
}

Also used : SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33