use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.
the class JobClientUnitTest method testCleanupTaskReportsWithNullJob.
@Test
public void testCleanupTaskReportsWithNullJob() throws Exception {
TestJobClient client = new TestJobClient(new JobConf());
Cluster mockCluster = mock(Cluster.class);
client.setCluster(mockCluster);
JobID id = new JobID("test", 0);
when(mockCluster.getJob(id)).thenReturn(null);
TaskReport[] result = client.getCleanupTaskReports(id);
assertEquals(0, result.length);
verify(mockCluster).getJob(id);
}
use of org.apache.hadoop.mapreduce.TaskReport in project cdap by caskdata.
the class MapReduceMetricsWriter method reportMapredStats.
// job level stats from counters built in to mapreduce
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException {
JobStatus jobStatus = jobConf.getStatus();
// map stats
float mapProgress = jobStatus.getMapProgress();
int runningMappers = 0;
int runningReducers = 0;
for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) {
runningMappers += tr.getRunningTaskAttemptIds().size();
}
for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) {
runningReducers += tr.getRunningTaskAttemptIds().size();
}
int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB);
int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB);
long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS);
long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS);
long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES);
mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100));
mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords);
mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords);
mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes);
mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers);
mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper);
LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper);
// reduce stats
float reduceProgress = jobStatus.getReduceProgress();
long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS);
long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS);
reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100));
reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords);
reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords);
reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers);
reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer);
LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer);
}
use of org.apache.hadoop.mapreduce.TaskReport in project druid by druid-io.
the class HadoopConverterJob method run.
public List<DataSegment> run() throws IOException {
final JobConf jobConf = new JobConf();
jobConf.setKeepFailedTaskFiles(false);
for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
}
final List<DataSegment> segments = converterConfig.getSegments();
if (segments.isEmpty()) {
throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
}
converterConfigIntoConfiguration(converterConfig, segments, jobConf);
// Map only. Number of map tasks determined by input format
jobConf.setNumReduceTasks(0);
jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));
setJobName(jobConf, segments);
if (converterConfig.getJobPriority() != null) {
jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
}
final Job job = Job.getInstance(jobConf);
job.setInputFormatClass(ConfigInputFormat.class);
job.setMapperClass(ConvertingMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setMapSpeculativeExecution(false);
job.setOutputFormatClass(ConvertingOutputFormat.class);
JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job);
Throwable throwable = null;
try {
job.submit();
log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
final boolean success = job.waitForCompletion(true);
if (!success) {
final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
if (reports != null) {
for (final TaskReport report : reports) {
log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics()));
}
}
return null;
}
try {
loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
} catch (IOException ex) {
log.error(ex, "Could not fetch counters");
}
final JobID jobID = job.getJobID();
final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
final List<Path> goodPaths = new ArrayList<>();
while (it.hasNext()) {
final LocatedFileStatus locatedFileStatus = it.next();
if (locatedFileStatus.isFile()) {
final Path myPath = locatedFileStatus.getPath();
if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
}
}
}
if (goodPaths.isEmpty()) {
log.warn("No good data found at [%s]", jobDir);
return null;
}
final List<DataSegment> returnList = ImmutableList.copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
@Nullable
@Override
public DataSegment apply(final Path input) {
try {
if (!fs.exists(input)) {
throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
}
} catch (final IOException e) {
throw Throwables.propagate(e);
}
try (final InputStream stream = fs.open(input)) {
return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
} catch (final IOException e) {
throw Throwables.propagate(e);
}
}
}));
if (returnList.size() == segments.size()) {
return returnList;
} else {
throw new ISE("Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir);
}
} catch (InterruptedException | ClassNotFoundException e) {
RuntimeException exception = Throwables.propagate(e);
throwable = exception;
throw exception;
} catch (Throwable t) {
throwable = t;
throw t;
} finally {
try {
cleanup(job);
} catch (IOException e) {
if (throwable != null) {
throwable.addSuppressed(e);
} else {
log.error(e, "Could not clean up job [%s]", job.getJobID());
}
}
}
}
use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.
the class CLI method displayTasks.
/**
* Display the information about a job's tasks, of a particular type and
* in a particular state
*
* @param job the job
* @param type the type of the task (map/reduce/setup/cleanup)
* @param state the state of the task
* (pending/running/completed/failed/killed)
* @throws IOException when there is an error communicating with the master
* @throws InterruptedException
* @throws IllegalArgumentException if an invalid type/state is passed
*/
protected void displayTasks(Job job, String type, String state) throws IOException, InterruptedException {
TaskReport[] reports = null;
reports = job.getTaskReports(TaskType.valueOf(org.apache.hadoop.util.StringUtils.toUpperCase(type)));
for (TaskReport report : reports) {
TIPStatus status = report.getCurrentStatus();
if ((state.equalsIgnoreCase("pending") && status == TIPStatus.PENDING) || (state.equalsIgnoreCase("running") && status == TIPStatus.RUNNING) || (state.equalsIgnoreCase("completed") && status == TIPStatus.COMPLETE) || (state.equalsIgnoreCase("failed") && status == TIPStatus.FAILED) || (state.equalsIgnoreCase("killed") && status == TIPStatus.KILLED)) {
printTaskAttempts(report);
}
}
}
use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.
the class JobClientUnitTest method testSetupTaskReportsWithNullJob.
@Test
public void testSetupTaskReportsWithNullJob() throws Exception {
TestJobClient client = new TestJobClient(new JobConf());
Cluster mockCluster = mock(Cluster.class);
client.setCluster(mockCluster);
JobID id = new JobID("test", 0);
when(mockCluster.getJob(id)).thenReturn(null);
TaskReport[] result = client.getSetupTaskReports(id);
assertEquals(0, result.length);
verify(mockCluster).getJob(id);
}
Aggregations