Search in sources :

Example 31 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project incubator-systemml by apache.

the class CSVReblockMR method runAssignRowIDMRJob.

public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens, String reblockInstructions, int replication, String[] smallestFiles, boolean transform, String naStrings, String spec) throws Exception {
    AssignRowIDMRReturn ret = new AssignRowIDMRReturn();
    JobConf job;
    job = new JobConf(CSVReblockMR.class);
    job.setJobName("Assign-RowID-MR");
    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL);
    job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);
    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);
    //set up the replication factor for the results
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set up custom map/reduce configurations 
    DMLConfig config = ConfigurationManager.getDMLConfig();
    MRJobConfiguration.setupCustomMRConfigurations(job, config);
    //set up the number of reducers
    job.setNumReduceTasks(1);
    // Print the complete instruction
    //if (LOG.isTraceEnabled())
    //inst.printCompelteMRJobInstruction();
    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVAssignRowIDMapper.class);
    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(OffsetCount.class);
    //configure reducer
    job.setReducerClass(CSVAssignRowIDReducer.class);
    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    //set up the output file
    ret.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename());
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, ret.counterFile);
    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(OffsetCount.class);
    // setup properties relevant to transform
    job.setBoolean(MRJobConfiguration.TF_TRANSFORM, transform);
    if (transform) {
        if (naStrings != null)
            // Adding "dummy" string to handle the case of na_strings = ""
            job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(naStrings));
        job.set(MRJobConfiguration.TF_SPEC, spec);
    }
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    Group rgroup = runjob.getCounters().getGroup(NUM_ROWS_IN_MATRIX);
    Group cgroup = runjob.getCounters().getGroup(NUM_COLS_IN_MATRIX);
    ret.rlens = new long[inputs.length];
    ret.clens = new long[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        // number of non-zeros
        ret.rlens[i] = rgroup.getCounter(Integer.toString(i));
        ret.clens[i] = cgroup.getCounter(Integer.toString(i));
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.hadoop.mapred.Counters.Group) DMLConfig(org.apache.sysml.conf.DMLConfig) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 32 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project compiler by boalang.

the class BoaOutputCommitter method abortJob.

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);
    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob((org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch(event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n";
            diag += "\n";
            break;
    }
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}
Also used : TaskCompletionEvent(org.apache.hadoop.mapred.TaskCompletionEvent) RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) JobConf(org.apache.hadoop.mapred.JobConf)

Example 33 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project cdap by caskdata.

the class MRJobClient method getMRJobInfo.

/**
   * @param runId for which information will be returned.
   * @return a {@link MRJobInfo} containing information about a particular MapReduce program run.
   * @throws IOException if there is failure to communicate through the JobClient.
   * @throws NotFoundException if a Job with the given runId is not found.
   */
public MRJobInfo getMRJobInfo(Id.Run runId) throws IOException, NotFoundException {
    Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType()));
    JobClient jobClient = new JobClient(hConf);
    JobStatus[] jobs = jobClient.getAllJobs();
    JobStatus thisJob = findJobForRunId(jobs, runId.toEntityId());
    RunningJob runningJob = jobClient.getJob(thisJob.getJobID());
    if (runningJob == null) {
        throw new IllegalStateException(String.format("JobClient returned null for RunId: '%s', JobId: '%s'", runId, thisJob.getJobID()));
    }
    Counters counters = runningJob.getCounters();
    TaskReport[] mapTaskReports = jobClient.getMapTaskReports(thisJob.getJobID());
    TaskReport[] reduceTaskReports = jobClient.getReduceTaskReports(thisJob.getJobID());
    return new MRJobInfo(runningJob.mapProgress(), runningJob.reduceProgress(), groupToMap(counters.getGroup(TaskCounter.class.getName())), toMRTaskInfos(mapTaskReports), toMRTaskInfos(reduceTaskReports), true);
}
Also used : JobStatus(org.apache.hadoop.mapred.JobStatus) MRJobInfo(co.cask.cdap.proto.MRJobInfo) TaskReport(org.apache.hadoop.mapred.TaskReport) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) JobClient(org.apache.hadoop.mapred.JobClient) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter)

Example 34 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableMapReduceUtil method shoudBeValidMapReduceEvaluation.

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
    Configuration cfg = UTIL.getConfiguration();
    JobConf jobConf = new JobConf(cfg);
    try {
        jobConf.setJobName("process row task");
        jobConf.setNumReduceTasks(1);
        TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf);
        TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf);
        RunningJob job = JobClient.runJob(jobConf);
        assertTrue(job.isSuccessful());
    } finally {
        if (jobConf != null)
            FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Example 35 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hive by apache.

the class LogRetriever method logJob.

private void logJob(String logDir, String jobID, PrintWriter listWriter) throws IOException {
    RunningJob rj = jobClient.getJob(JobID.forName(jobID));
    String jobURLString = rj.getTrackingURL();
    Path jobDir = new Path(logDir, jobID);
    fs.mkdirs(jobDir);
    // Logger jobconf
    try {
        logJobConf(jobID, jobURLString, jobDir.toString());
    } catch (IOException e) {
        System.err.println("Cannot retrieve job.xml.html for " + jobID);
        e.printStackTrace();
    }
    listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + "," + "status=" + JobStatus.getJobRunState(rj.getJobState()) + ")");
    // Get completed attempts
    List<AttemptInfo> attempts = new ArrayList<AttemptInfo>();
    for (String type : new String[] { "map", "reduce", "setup", "cleanup" }) {
        try {
            List<AttemptInfo> successAttempts = getCompletedAttempts(jobID, jobURLString, type);
            attempts.addAll(successAttempts);
        } catch (IOException e) {
            System.err.println("Cannot retrieve " + type + " tasks for " + jobID);
            e.printStackTrace();
        }
    }
    // Get failed attempts
    try {
        List<AttemptInfo> failedAttempts = getFailedAttempts(jobID, jobURLString);
        attempts.addAll(failedAttempts);
    } catch (IOException e) {
        System.err.println("Cannot retrieve failed attempts for " + jobID);
        e.printStackTrace();
    }
    // Logger attempts
    for (AttemptInfo attempt : attempts) {
        try {
            logAttempt(jobID, attempt, jobDir.toString());
            listWriter.println("  attempt:" + attempt.id + "(" + "type=" + attempt.type + "," + "status=" + attempt.status + "," + "starttime=" + attempt.startTime + "," + "endtime=" + attempt.endTime + ")");
        } catch (IOException e) {
            System.err.println("Cannot log attempt " + attempt.id);
            e.printStackTrace();
        }
    }
    listWriter.println();
}
Also used : Path(org.apache.hadoop.fs.Path) RunningJob(org.apache.hadoop.mapred.RunningJob) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)61 JobConf (org.apache.hadoop.mapred.JobConf)45 Path (org.apache.hadoop.fs.Path)35 FileSystem (org.apache.hadoop.fs.FileSystem)24 JobClient (org.apache.hadoop.mapred.JobClient)20 IOException (java.io.IOException)15 Counters (org.apache.hadoop.mapred.Counters)14 Group (org.apache.hadoop.mapred.Counters.Group)13 DMLConfig (org.apache.sysml.conf.DMLConfig)13 Configuration (org.apache.hadoop.conf.Configuration)7 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 DataOutputStream (java.io.DataOutputStream)6 File (java.io.File)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Text (org.apache.hadoop.io.Text)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)5 Test (org.junit.Test)5 URI (java.net.URI)4