use of org.apache.hadoop.mapred.RunningJob in project incubator-systemml by apache.
the class CSVReblockMR method runAssignRowIDMRJob.
public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens, String reblockInstructions, int replication, String[] smallestFiles, boolean transform, String naStrings, String spec) throws Exception {
AssignRowIDMRReturn ret = new AssignRowIDMRReturn();
JobConf job;
job = new JobConf(CSVReblockMR.class);
job.setJobName("Assign-RowID-MR");
byte[] realIndexes = new byte[inputs.length];
for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;
//set up the input files and their format information
MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL);
job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);
//set up the aggregate instructions that will happen in the combiner and reducer
MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);
//set up the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set up custom map/reduce configurations
DMLConfig config = ConfigurationManager.getDMLConfig();
MRJobConfiguration.setupCustomMRConfigurations(job, config);
//set up the number of reducers
job.setNumReduceTasks(1);
// Print the complete instruction
//if (LOG.isTraceEnabled())
//inst.printCompelteMRJobInstruction();
// configure mapper and the mapper output key value pairs
job.setMapperClass(CSVAssignRowIDMapper.class);
job.setMapOutputKeyClass(ByteWritable.class);
job.setMapOutputValueClass(OffsetCount.class);
//configure reducer
job.setReducerClass(CSVAssignRowIDReducer.class);
//turn off adaptivemr
job.setBoolean("adaptivemr.map.enable", false);
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
//set up the output file
ret.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename());
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, ret.counterFile);
job.setOutputKeyClass(ByteWritable.class);
job.setOutputValueClass(OffsetCount.class);
// setup properties relevant to transform
job.setBoolean(MRJobConfiguration.TF_TRANSFORM, transform);
if (transform) {
if (naStrings != null)
// Adding "dummy" string to handle the case of na_strings = ""
job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(naStrings));
job.set(MRJobConfiguration.TF_SPEC, spec);
}
RunningJob runjob = JobClient.runJob(job);
/* Process different counters */
Group rgroup = runjob.getCounters().getGroup(NUM_ROWS_IN_MATRIX);
Group cgroup = runjob.getCounters().getGroup(NUM_COLS_IN_MATRIX);
ret.rlens = new long[inputs.length];
ret.clens = new long[inputs.length];
for (int i = 0; i < inputs.length; i++) {
// number of non-zeros
ret.rlens[i] = rgroup.getCounter(Integer.toString(i));
ret.clens[i] = cgroup.getCounter(Integer.toString(i));
}
return ret;
}
use of org.apache.hadoop.mapred.RunningJob in project compiler by boalang.
the class BoaOutputCommitter method abortJob.
@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
super.abortJob(context, runState);
final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
final RunningJob job = jobClient.getJob((org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
String diag = "";
for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch(event.getTaskStatus()) {
case SUCCEEDED:
break;
case FAILED:
case KILLED:
case OBSOLETE:
case TIPFAILED:
diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n";
diag += "\n";
break;
}
updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}
use of org.apache.hadoop.mapred.RunningJob in project cdap by caskdata.
the class MRJobClient method getMRJobInfo.
/**
* @param runId for which information will be returned.
* @return a {@link MRJobInfo} containing information about a particular MapReduce program run.
* @throws IOException if there is failure to communicate through the JobClient.
* @throws NotFoundException if a Job with the given runId is not found.
*/
public MRJobInfo getMRJobInfo(Id.Run runId) throws IOException, NotFoundException {
Preconditions.checkArgument(ProgramType.MAPREDUCE.equals(runId.getProgram().getType()));
JobClient jobClient = new JobClient(hConf);
JobStatus[] jobs = jobClient.getAllJobs();
JobStatus thisJob = findJobForRunId(jobs, runId.toEntityId());
RunningJob runningJob = jobClient.getJob(thisJob.getJobID());
if (runningJob == null) {
throw new IllegalStateException(String.format("JobClient returned null for RunId: '%s', JobId: '%s'", runId, thisJob.getJobID()));
}
Counters counters = runningJob.getCounters();
TaskReport[] mapTaskReports = jobClient.getMapTaskReports(thisJob.getJobID());
TaskReport[] reduceTaskReports = jobClient.getReduceTaskReports(thisJob.getJobID());
return new MRJobInfo(runningJob.mapProgress(), runningJob.reduceProgress(), groupToMap(counters.getGroup(TaskCounter.class.getName())), toMRTaskInfos(mapTaskReports), toMRTaskInfos(reduceTaskReports), true);
}
use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.
the class TestTableMapReduceUtil method shoudBeValidMapReduceEvaluation.
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
Configuration cfg = UTIL.getConfiguration();
JobConf jobConf = new JobConf(cfg);
try {
jobConf.setJobName("process row task");
jobConf.setNumReduceTasks(1);
TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf);
TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf);
RunningJob job = JobClient.runJob(jobConf);
assertTrue(job.isSuccessful());
} finally {
if (jobConf != null)
FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
}
}
use of org.apache.hadoop.mapred.RunningJob in project hive by apache.
the class LogRetriever method logJob.
private void logJob(String logDir, String jobID, PrintWriter listWriter) throws IOException {
RunningJob rj = jobClient.getJob(JobID.forName(jobID));
String jobURLString = rj.getTrackingURL();
Path jobDir = new Path(logDir, jobID);
fs.mkdirs(jobDir);
// Logger jobconf
try {
logJobConf(jobID, jobURLString, jobDir.toString());
} catch (IOException e) {
System.err.println("Cannot retrieve job.xml.html for " + jobID);
e.printStackTrace();
}
listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + "," + "status=" + JobStatus.getJobRunState(rj.getJobState()) + ")");
// Get completed attempts
List<AttemptInfo> attempts = new ArrayList<AttemptInfo>();
for (String type : new String[] { "map", "reduce", "setup", "cleanup" }) {
try {
List<AttemptInfo> successAttempts = getCompletedAttempts(jobID, jobURLString, type);
attempts.addAll(successAttempts);
} catch (IOException e) {
System.err.println("Cannot retrieve " + type + " tasks for " + jobID);
e.printStackTrace();
}
}
// Get failed attempts
try {
List<AttemptInfo> failedAttempts = getFailedAttempts(jobID, jobURLString);
attempts.addAll(failedAttempts);
} catch (IOException e) {
System.err.println("Cannot retrieve failed attempts for " + jobID);
e.printStackTrace();
}
// Logger attempts
for (AttemptInfo attempt : attempts) {
try {
logAttempt(jobID, attempt, jobDir.toString());
listWriter.println(" attempt:" + attempt.id + "(" + "type=" + attempt.type + "," + "status=" + attempt.status + "," + "starttime=" + attempt.startTime + "," + "endtime=" + attempt.endTime + ")");
} catch (IOException e) {
System.err.println("Cannot log attempt " + attempt.id);
e.printStackTrace();
}
}
listWriter.println();
}
Aggregations