Examples with Job - org.apache.hadoop.mapreduce.Job

Example 56 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testJobWithChangePriority.

@Test(timeout = 3000000)
public void testJobWithChangePriority() throws Exception {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    Configuration sleepConf = new Configuration(mrCluster.getConfig());
    // set master address to local to test that local mode applied if framework
    // equals local
    sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
    sleepConf.setInt(MRJobConfig.MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS, 5);
    SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(sleepConf);
    Job job = sleepJob.createJob(1, 1, 1000, 20, 50, 1);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.setJarByClass(SleepJob.class);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    // Set the priority to HIGH
    job.setPriority(JobPriority.HIGH);
    waitForPriorityToUpdate(job, JobPriority.HIGH);
    // Verify the priority from job itself
    Assert.assertEquals(job.getPriority(), JobPriority.HIGH);
    // Change priority to NORMAL (3) with new api
    // Verify the priority from job itself
    job.setPriorityAsInteger(3);
    waitForPriorityToUpdate(job, JobPriority.NORMAL);
    Assert.assertEquals(job.getPriority(), JobPriority.NORMAL);
    // Change priority to a high integer value with new api
    // Verify the priority from job itself
    job.setPriorityAsInteger(89);
    waitForPriorityToUpdate(job, JobPriority.UNDEFINED_PRIORITY);
    Assert.assertEquals(job.getPriority(), JobPriority.UNDEFINED_PRIORITY);
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) Test(org.junit.Test)

Example 57 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testConfVerification.

private void testConfVerification(boolean useJobClassLoader, boolean useCustomClasses, boolean useJobClientForMonitring, boolean useLocal) throws Exception {
    LOG.info("\n\n\nStarting testConfVerification()" + " jobClassloader=" + useJobClassLoader + " customClasses=" + useCustomClasses + " jobClient=" + useJobClientForMonitring + " localMode=" + useLocal);
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final Configuration clusterConfig;
    if (useLocal) {
        clusterConfig = new Configuration();
        conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
    } else {
        clusterConfig = mrCluster.getConfig();
    }
    final JobClient jc = new JobClient(clusterConfig);
    final Configuration sleepConf = new Configuration(clusterConfig);
    // set master address to local to test that local mode applied iff framework == local
    sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
    sleepConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, useJobClassLoader);
    if (useCustomClasses) {
        // to test AM loading user classes such as output format class, we want
        // to blacklist them from the system classes (they need to be prepended
        // as the first match wins)
        String systemClasses = ApplicationClassLoader.SYSTEM_CLASSES_DEFAULT;
        // exclude the custom classes from system classes
        systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" + CustomSpeculator.class.getName() + "," + systemClasses;
        sleepConf.set(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES, systemClasses);
    }
    sleepConf.set(MRJobConfig.IO_SORT_MB, TEST_IO_SORT_MB);
    sleepConf.set(MRJobConfig.MR_AM_LOG_LEVEL, Level.ALL.toString());
    sleepConf.set(MRJobConfig.MAP_LOG_LEVEL, Level.ALL.toString());
    sleepConf.set(MRJobConfig.REDUCE_LOG_LEVEL, Level.ALL.toString());
    sleepConf.set(MRJobConfig.MAP_JAVA_OPTS, "-verbose:class");
    sleepConf.set(MRJobConfig.COUNTER_GROUPS_MAX_KEY, TEST_GROUP_MAX);
    final SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(sleepConf);
    final Job job = sleepJob.createJob(1, 1, 10, 1, 10, 1);
    job.setMapperClass(ConfVerificationMapper.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.setJarByClass(SleepJob.class);
    // speed up failures
    job.setMaxMapAttempts(1);
    if (useCustomClasses) {
        // set custom output format class and speculator class
        job.setOutputFormatClass(CustomOutputFormat.class);
        final Configuration jobConf = job.getConfiguration();
        jobConf.setClass(MRJobConfig.MR_AM_JOB_SPECULATOR, CustomSpeculator.class, Speculator.class);
        // speculation needs to be enabled for the speculator to be loaded
        jobConf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
    }
    job.submit();
    final boolean succeeded;
    if (useJobClientForMonitring && !useLocal) {
        // We can't use getJobID in useLocal case because JobClient and Job
        // point to different instances of LocalJobRunner
        //
        final JobID mapredJobID = JobID.downgrade(job.getJobID());
        RunningJob runningJob = null;
        do {
            Thread.sleep(10);
            runningJob = jc.getJob(mapredJobID);
        } while (runningJob == null);
        Assert.assertEquals("Unexpected RunningJob's " + MRJobConfig.COUNTER_GROUPS_MAX_KEY, TEST_GROUP_MAX, runningJob.getConfiguration().get(MRJobConfig.COUNTER_GROUPS_MAX_KEY));
        runningJob.waitForCompletion();
        succeeded = runningJob.isSuccessful();
    } else {
        succeeded = job.waitForCompletion(true);
    }
    Assert.assertTrue("Job status: " + job.getStatus().getFailureInfo(), succeeded);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) RunningJob(org.apache.hadoop.mapred.RunningJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) JobClient(org.apache.hadoop.mapred.JobClient) JobID(org.apache.hadoop.mapred.JobID)

Example 58 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testSleepJobInternal.

private void testSleepJobInternal(Configuration sleepConf, boolean useRemoteJar, boolean jobSubmissionShouldSucceed, ResourceViolation violation) throws Exception {
    LOG.info("\n\n\nStarting testSleepJob: useRemoteJar=" + useRemoteJar);
    if (!jobSubmissionShouldSucceed && violation == null) {
        Assert.fail("Test is misconfigured. jobSubmissionShouldSucceed is set" + " to false and a ResourceViolation is not specified.");
    }
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    // set master address to local to test that local mode applied iff framework == local
    sleepConf.set(MRConfig.MASTER_ADDRESS, "local");
    SleepJob sleepJob = new SleepJob();
    sleepJob.setConf(sleepConf);
    // job with 3 maps (10s) and numReduces reduces (5s), 1 "record" each:
    Job job = sleepJob.createJob(3, numSleepReducers, 10000, 1, 5000, 1);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    if (useRemoteJar) {
        final Path localJar = new Path(ClassUtil.findContainingJar(SleepJob.class));
        ConfigUtil.addLink(job.getConfiguration(), "/jobjars", localFs.makeQualified(localJar.getParent()).toUri());
        job.setJar("viewfs:///jobjars/" + localJar.getName());
    } else {
        job.setJarByClass(SleepJob.class);
    }
    // speed up failures
    job.setMaxMapAttempts(1);
    try {
        job.submit();
        Assert.assertTrue("JobSubmission succeeded when it should have failed.", jobSubmissionShouldSucceed);
    } catch (IOException e) {
        if (jobSubmissionShouldSucceed) {
            Assert.fail("Job submission failed when it should have succeeded: " + e);
        }
        switch(violation) {
            case NUMBER_OF_RESOURCES:
                if (!e.getMessage().contains("This job has exceeded the maximum number of" + " submitted resources")) {
                    Assert.fail("Test failed unexpectedly: " + e);
                }
                break;
            case TOTAL_RESOURCE_SIZE:
                if (!e.getMessage().contains("This job has exceeded the maximum size of submitted resources")) {
                    Assert.fail("Test failed unexpectedly: " + e);
                }
                break;
            case SINGLE_RESOURCE_SIZE:
                if (!e.getMessage().contains("This job has exceeded the maximum size of a single submitted")) {
                    Assert.fail("Test failed unexpectedly: " + e);
                }
                break;
            default:
                Assert.fail("Test failed unexpectedly: " + e);
                break;
        }
        // we are done with the test (job submission failed)
        return;
    }
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    verifySleepJobCounters(job);
    verifyTaskProgress(job);
// TODO later:  add explicit "isUber()" checks of some sort (extend
// JobStatus?)--compare against MRJobConfig.JOB_UBERTASK_ENABLE value
}

Also used : Path(org.apache.hadoop.fs.Path) SleepJob(org.apache.hadoop.mapreduce.SleepJob) IOException(java.io.IOException) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File)

Example 59 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testRandomWriter.

@Test(timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testRandomWriter().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
    mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
    mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
    Job job = randomWriterJob.createJob(mrCluster.getConfig());
    Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setSpeculativeExecution(false);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.setJarByClass(RandomTextWriterJob.class);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    // Make sure there are three files in the output-dir
    RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrCluster.getConfig()).listStatus(outputDir);
    int count = 0;
    while (iterator.hasNext()) {
        FileStatus file = iterator.next();
        if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
            count++;
        }
    }
    Assert.assertEquals("Number of part files is wrong!", 3, count);
    verifyRandomWriterCounters(job);
// TODO later:  add explicit "isUber()" checks of some sort
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) File(java.io.File) Test(org.junit.Test)

Example 60 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRJobs method testThreadDumpOnTaskTimeout.

@Test(timeout = 120000)
public void testThreadDumpOnTaskTimeout() throws IOException, InterruptedException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final SleepJob sleepJob = new SleepJob();
    final JobConf sleepConf = new JobConf(mrCluster.getConfig());
    sleepConf.setLong(MRJobConfig.TASK_TIMEOUT, 3 * 1000L);
    sleepConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
    sleepJob.setConf(sleepConf);
    if (this instanceof TestUberAM) {
        sleepConf.setInt(MRJobConfig.MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS, 30 * 1000);
    }
    // sleep for 10 seconds to trigger a kill with thread dump
    final Job job = sleepJob.createJob(1, 0, 10 * 60 * 1000L, 1, 0L, 0);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    final JobId jobId = TypeConverter.toYarn(job.getJobID());
    final ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    // Job finished, verify logs
    //
    final String appIdStr = appID.toString();
    final String appIdSuffix = appIdStr.substring("application_".length(), appIdStr.length());
    final String containerGlob = "container_" + appIdSuffix + "_*_*";
    final String syslogGlob = appIdStr + Path.SEPARATOR + containerGlob + Path.SEPARATOR + TaskLog.LogName.SYSLOG;
    int numAppMasters = 0;
    int numMapTasks = 0;
    for (int i = 0; i < NUM_NODE_MGRS; i++) {
        final Configuration nmConf = mrCluster.getNodeManager(i).getConfig();
        for (String logDir : nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) {
            final Path absSyslogGlob = new Path(logDir + Path.SEPARATOR + syslogGlob);
            LOG.info("Checking for glob: " + absSyslogGlob);
            for (FileStatus syslog : localFs.globStatus(absSyslogGlob)) {
                boolean foundAppMaster = false;
                boolean foundThreadDump = false;
                // Determine the container type
                final BufferedReader syslogReader = new BufferedReader(new InputStreamReader(localFs.open(syslog.getPath())));
                try {
                    for (String line; (line = syslogReader.readLine()) != null; ) {
                        if (line.contains(MRAppMaster.class.getName())) {
                            foundAppMaster = true;
                            break;
                        }
                    }
                } finally {
                    syslogReader.close();
                }
                // Check for thread dump in stdout
                final Path stdoutPath = new Path(syslog.getPath().getParent(), TaskLog.LogName.STDOUT.toString());
                final BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(localFs.open(stdoutPath)));
                try {
                    for (String line; (line = stdoutReader.readLine()) != null; ) {
                        if (line.contains("Full thread dump")) {
                            foundThreadDump = true;
                            break;
                        }
                    }
                } finally {
                    stdoutReader.close();
                }
                if (foundAppMaster) {
                    numAppMasters++;
                    if (this instanceof TestUberAM) {
                        Assert.assertTrue("No thread dump", foundThreadDump);
                    } else {
                        Assert.assertFalse("Unexpected thread dump", foundThreadDump);
                    }
                } else {
                    numMapTasks++;
                    Assert.assertTrue("No thread dump", foundThreadDump);
                }
            }
        }
    }
    // Make sure we checked non-empty set
    //
    Assert.assertEquals("No AppMaster log found!", 1, numAppMasters);
    if (sleepConf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false)) {
        Assert.assertSame("MapTask log with uber found!", 0, numMapTasks);
    } else {
        Assert.assertSame("No MapTask log found!", 1, numMapTasks);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33