Examples with RunningJob - org.apache.hadoop.mapred.RunningJob

Example 51 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class OldAPICombinerTest method testWordCountCombinerWithOldAPI.

@Test
public void testWordCountCombinerWithOldAPI() throws Exception {
    final Configuration nativeConf = ScenarioConfiguration.getNativeConfiguration();
    nativeConf.addResource(TestConstants.COMBINER_CONF_PATH);
    final String nativeoutput = TestConstants.NATIVETASK_OLDAPI_COMBINER_TEST_NATIVE_OUTPUTPATH;
    final JobConf nativeJob = getOldAPIJobconf(nativeConf, "nativeCombinerWithOldAPI", inputpath, nativeoutput);
    RunningJob nativeRunning = JobClient.runJob(nativeJob);
    Counter nativeReduceGroups = nativeRunning.getCounters().findCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    final Configuration normalConf = ScenarioConfiguration.getNormalConfiguration();
    normalConf.addResource(TestConstants.COMBINER_CONF_PATH);
    final String normaloutput = TestConstants.NATIVETASK_OLDAPI_COMBINER_TEST_NORMAL_OUTPUTPATH;
    final JobConf normalJob = getOldAPIJobconf(normalConf, "normalCombinerWithOldAPI", inputpath, normaloutput);
    RunningJob normalRunning = JobClient.runJob(normalJob);
    Counter normalReduceGroups = normalRunning.getCounters().findCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    final boolean compareRet = ResultVerifier.verify(nativeoutput, normaloutput);
    assertEquals("file compare result: if they are the same ,then return true", true, compareRet);
    assertEquals("The input reduce record count must be same", nativeReduceGroups.getValue(), normalReduceGroups.getValue());
}

Also used : Counter(org.apache.hadoop.mapreduce.Counter) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter) Configuration(org.apache.hadoop.conf.Configuration) ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 52 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class TestMiniMRProxyUser method mrRun.

private void mrRun() throws Exception {
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();
    Path outputDir = new Path("output", "output");
    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    assertTrue(runJob.isComplete());
    assertTrue(runJob.isSuccessful());
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) OutputStreamWriter(java.io.OutputStreamWriter) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 53 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class TestNonExistentJob method testGetInvalidJob.

@Test
public void testGetInvalidJob() throws Exception {
    RunningJob runJob = new JobClient(getJobConf()).getJob(JobID.forName("job_0_0"));
    assertNull(runJob);
}

Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) Test(org.junit.Test)

Example 54 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class DataJoinJob method runJob.

/**
   * Submit/run a map/reduce job.
   * 
   * @param job
   * @return true for success
   * @throws IOException
   */
public static boolean runJob(JobConf job) throws IOException {
    JobClient jc = new JobClient(job);
    boolean sucess = true;
    RunningJob running = null;
    try {
        running = jc.submitJob(job);
        JobID jobId = running.getID();
        System.out.println("Job " + jobId + " is submitted");
        while (!running.isComplete()) {
            System.out.println("Job " + jobId + " is still running.");
            try {
                Thread.sleep(60000);
            } catch (InterruptedException e) {
            }
            running = jc.getJob(jobId);
        }
        sucess = running.isSuccessful();
    } finally {
        if (!sucess && (running != null)) {
            running.killJob();
        }
        jc.close();
    }
    return sucess;
}

Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) JobID(org.apache.hadoop.mapred.JobID)

Example 55 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hive by apache.

the class ColumnTruncateTask method execute.

@Override
public /**
 * start a new map-reduce job to do the truncation, almost the same as ExecDriver.
 */
int execute(DriverContext driverContext) {
    HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName());
    success = true;
    HiveFileFormatUtils.prepareJobOutput(job);
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(work.getMapperClass());
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    if (work.getNumMapTasks() != null) {
        job.setNumMapTasks(work.getNumMapTasks());
    }
    // zero reducers
    job.setNumReduceTasks(0);
    if (work.getMinSplitSize() != null) {
        HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
    }
    if (work.getInputformat() != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
    }
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    Path outputPath = this.work.getOutputDir();
    Path tempOutPath = Utilities.toTempPath(outputPath);
    try {
        FileSystem fs = tempOutPath.getFileSystem(job);
        if (!fs.exists(tempOutPath)) {
            fs.mkdirs(tempOutPath);
        }
    } catch (IOException e) {
        console.printError("Can't make path " + outputPath + " : " + e.getMessage());
        return 6;
    }
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    int returnVal = 0;
    RunningJob rj = null;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    String jobName = null;
    if (noName && this.getQueryPlan() != null) {
        int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
        jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
    }
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
    }
    try {
        addInputPaths(job, work);
        MapredWork mrWork = new MapredWork();
        mrWork.setMapWork(work);
        Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
        // remove the pwd from conf file so that job tracker doesn't show this
        // logs
        String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
        if (pwd != null) {
            HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
        }
        JobClient jc = new JobClient(job);
        String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
        if (!addedJars.isEmpty()) {
            job.set("tmpjars", addedJars);
        }
        // make this client wait if job trcker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        // Finally SUBMIT the JOB!
        rj = jc.submitJob(job);
        this.jobID = rj.getJobID();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
            }
            ColumnTruncateMapper.jobClose(outputPath, success, job, console, work.getDynPartCtx(), null);
        } catch (Exception e) {
            LOG.warn("Failed while cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    return (returnVal);
}

Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) IOException(java.io.IOException) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)93 JobConf (org.apache.hadoop.mapred.JobConf)65 Path (org.apache.hadoop.fs.Path)49 JobClient (org.apache.hadoop.mapred.JobClient)33 IOException (java.io.IOException)28 FileSystem (org.apache.hadoop.fs.FileSystem)28 DMLConfig (org.apache.sysml.conf.DMLConfig)27 Group (org.apache.hadoop.mapred.Counters.Group)26 Counters (org.apache.hadoop.mapred.Counters)17 Configuration (org.apache.hadoop.conf.Configuration)14 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)13 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 File (java.io.File)6 TaggedMatrixBlock (org.apache.sysml.runtime.matrix.data.TaggedMatrixBlock)6 DataOutputStream (java.io.DataOutputStream)5 URI (java.net.URI)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 Context (org.apache.hadoop.hive.ql.Context)5 Text (org.apache.hadoop.io.Text)5