Search in sources :

Example 1 with TaskReport

use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.

the class JobClientUnitTest method testCleanupTaskReportsWithNullJob.

@Test
public void testCleanupTaskReportsWithNullJob() throws Exception {
    TestJobClient client = new TestJobClient(new JobConf());
    Cluster mockCluster = mock(Cluster.class);
    client.setCluster(mockCluster);
    JobID id = new JobID("test", 0);
    when(mockCluster.getJob(id)).thenReturn(null);
    TaskReport[] result = client.getCleanupTaskReports(id);
    assertEquals(0, result.length);
    verify(mockCluster).getJob(id);
}
Also used : TaskReport(org.apache.hadoop.mapreduce.TaskReport) Cluster(org.apache.hadoop.mapreduce.Cluster) Test(org.junit.Test)

Example 2 with TaskReport

use of org.apache.hadoop.mapreduce.TaskReport in project cdap by caskdata.

the class MapReduceMetricsWriter method reportMapredStats.

// job level stats from counters built in to mapreduce
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException {
    JobStatus jobStatus = jobConf.getStatus();
    // map stats
    float mapProgress = jobStatus.getMapProgress();
    int runningMappers = 0;
    int runningReducers = 0;
    for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) {
        runningMappers += tr.getRunningTaskAttemptIds().size();
    }
    for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) {
        runningReducers += tr.getRunningTaskAttemptIds().size();
    }
    int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB);
    int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB);
    long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS);
    long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS);
    long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100));
    mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper);
    LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper);
    // reduce stats
    float reduceProgress = jobStatus.getReduceProgress();
    long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS);
    long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100));
    reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer);
    LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer);
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) TaskReport(org.apache.hadoop.mapreduce.TaskReport)

Example 3 with TaskReport

use of org.apache.hadoop.mapreduce.TaskReport in project druid by druid-io.

the class HadoopConverterJob method run.

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);
    // Map only. Number of map tasks determined by input format
    jobConf.setNumReduceTasks(0);
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));
    setJobName(jobConf, segments);
    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }
    final Job job = Job.getInstance(jobConf);
    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);
    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job);
    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();
        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList.copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {

            @Nullable
            @Override
            public DataSegment apply(final Path input) {
                try {
                    if (!fs.exists(input)) {
                        throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                    }
                } catch (final IOException e) {
                    throw Throwables.propagate(e);
                }
                try (final InputStream stream = fs.open(input)) {
                    return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                } catch (final IOException e) {
                    throw Throwables.propagate(e);
                }
            }
        }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE("Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) Function(com.google.common.base.Function) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(io.druid.java.util.common.ISE) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) TaskReport(org.apache.hadoop.mapreduce.TaskReport) InputStream(java.io.InputStream) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) IAE(io.druid.java.util.common.IAE) Map(java.util.Map) JobID(org.apache.hadoop.mapreduce.JobID)

Example 4 with TaskReport

use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.

the class CLI method displayTasks.

/**
   * Display the information about a job's tasks, of a particular type and
   * in a particular state
   * 
   * @param job the job
   * @param type the type of the task (map/reduce/setup/cleanup)
   * @param state the state of the task 
   * (pending/running/completed/failed/killed)
   * @throws IOException when there is an error communicating with the master
   * @throws InterruptedException
   * @throws IllegalArgumentException if an invalid type/state is passed
   */
protected void displayTasks(Job job, String type, String state) throws IOException, InterruptedException {
    TaskReport[] reports = null;
    reports = job.getTaskReports(TaskType.valueOf(org.apache.hadoop.util.StringUtils.toUpperCase(type)));
    for (TaskReport report : reports) {
        TIPStatus status = report.getCurrentStatus();
        if ((state.equalsIgnoreCase("pending") && status == TIPStatus.PENDING) || (state.equalsIgnoreCase("running") && status == TIPStatus.RUNNING) || (state.equalsIgnoreCase("completed") && status == TIPStatus.COMPLETE) || (state.equalsIgnoreCase("failed") && status == TIPStatus.FAILED) || (state.equalsIgnoreCase("killed") && status == TIPStatus.KILLED)) {
            printTaskAttempts(report);
        }
    }
}
Also used : TIPStatus(org.apache.hadoop.mapred.TIPStatus) TaskReport(org.apache.hadoop.mapreduce.TaskReport)

Example 5 with TaskReport

use of org.apache.hadoop.mapreduce.TaskReport in project hadoop by apache.

the class JobClientUnitTest method testSetupTaskReportsWithNullJob.

@Test
public void testSetupTaskReportsWithNullJob() throws Exception {
    TestJobClient client = new TestJobClient(new JobConf());
    Cluster mockCluster = mock(Cluster.class);
    client.setCluster(mockCluster);
    JobID id = new JobID("test", 0);
    when(mockCluster.getJob(id)).thenReturn(null);
    TaskReport[] result = client.getSetupTaskReports(id);
    assertEquals(0, result.length);
    verify(mockCluster).getJob(id);
}
Also used : TaskReport(org.apache.hadoop.mapreduce.TaskReport) Cluster(org.apache.hadoop.mapreduce.Cluster) Test(org.junit.Test)

Aggregations

TaskReport (org.apache.hadoop.mapreduce.TaskReport)7 Cluster (org.apache.hadoop.mapreduce.Cluster)4 Test (org.junit.Test)4 Function (com.google.common.base.Function)1 WindowedDataSegment (io.druid.indexer.hadoop.WindowedDataSegment)1 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1 DataSegment (io.druid.timeline.DataSegment)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)1 Path (org.apache.hadoop.fs.Path)1 JobConf (org.apache.hadoop.mapred.JobConf)1 TIPStatus (org.apache.hadoop.mapred.TIPStatus)1 Job (org.apache.hadoop.mapreduce.Job)1 JobID (org.apache.hadoop.mapreduce.JobID)1 JobStatus (org.apache.hadoop.mapreduce.JobStatus)1