Examples with TaskInfo - org.apache.hadoop.tools.rumen.TaskInfo

Example 1 with TaskInfo

use of org.apache.hadoop.tools.rumen.TaskInfo in project hadoop by apache.

the class JobFactory method getNextJobFiltered.

protected JobStory getNextJobFiltered() throws IOException {
    JobStory job = getNextJobFromTrace();
    // These jobs are not yet supported in Gridmix
    while (job != null && (job.getOutcome() != Pre21JobHistoryConstants.Values.SUCCESS || job.getSubmissionTime() < 0 || job.getNumberMaps() == 0)) {
        if (LOG.isDebugEnabled()) {
            List<String> reason = new ArrayList<String>();
            if (job.getOutcome() != Pre21JobHistoryConstants.Values.SUCCESS) {
                reason.add("STATE (" + job.getOutcome().name() + ")");
            }
            if (job.getSubmissionTime() < 0) {
                reason.add("SUBMISSION-TIME (" + job.getSubmissionTime() + ")");
            }
            if (job.getNumberMaps() == 0) {
                reason.add("ZERO-MAPS-JOB");
            }
            // TODO This should never happen. Probably we missed something!
            if (reason.size() == 0) {
                reason.add("N/A");
            }
            LOG.debug("Ignoring job " + job.getJobID() + " from the input trace." + " Reason: " + StringUtils.join(reason, ","));
        }
        job = getNextJobFromTrace();
    }
    return null == job ? null : new FilterJobStory(job) {

        @Override
        public TaskInfo getTaskInfo(TaskType taskType, int taskNumber) {
            TaskInfo info = this.job.getTaskInfo(taskType, taskNumber);
            if (info != null) {
                info = new MinTaskInfo(info);
            } else {
                info = new MinTaskInfo(new TaskInfo(0, 0, 0, 0, 0));
            }
            return info;
        }
    };
}

Also used : TaskInfo(org.apache.hadoop.tools.rumen.TaskInfo) JobStory(org.apache.hadoop.tools.rumen.JobStory) TaskType(org.apache.hadoop.mapreduce.TaskType) ArrayList(java.util.ArrayList)

Example 2 with TaskInfo

use of org.apache.hadoop.tools.rumen.TaskInfo in project hadoop by apache.

the class LoadJob method buildSplits.

@Override
void buildSplits(FilePool inputDir) throws IOException {
    long mapInputBytesTotal = 0L;
    long mapOutputBytesTotal = 0L;
    long mapOutputRecordsTotal = 0L;
    final JobStory jobdesc = getJobDesc();
    if (null == jobdesc) {
        return;
    }
    final int maps = jobdesc.getNumberMaps();
    final int reds = jobdesc.getNumberReduces();
    for (int i = 0; i < maps; ++i) {
        final TaskInfo info = jobdesc.getTaskInfo(TaskType.MAP, i);
        mapInputBytesTotal += info.getInputBytes();
        mapOutputBytesTotal += info.getOutputBytes();
        mapOutputRecordsTotal += info.getOutputRecords();
    }
    final double[] reduceRecordRatio = new double[reds];
    final double[] reduceByteRatio = new double[reds];
    for (int i = 0; i < reds; ++i) {
        final TaskInfo info = jobdesc.getTaskInfo(TaskType.REDUCE, i);
        reduceByteRatio[i] = info.getInputBytes() / (1.0 * mapOutputBytesTotal);
        reduceRecordRatio[i] = info.getInputRecords() / (1.0 * mapOutputRecordsTotal);
    }
    final InputStriper striper = new InputStriper(inputDir, mapInputBytesTotal);
    final List<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < maps; ++i) {
        final int nSpec = reds / maps + ((reds % maps) > i ? 1 : 0);
        final long[] specBytes = new long[nSpec];
        final long[] specRecords = new long[nSpec];
        final ResourceUsageMetrics[] metrics = new ResourceUsageMetrics[nSpec];
        for (int j = 0; j < nSpec; ++j) {
            final TaskInfo info = jobdesc.getTaskInfo(TaskType.REDUCE, i + j * maps);
            specBytes[j] = info.getOutputBytes();
            specRecords[j] = info.getOutputRecords();
            metrics[j] = info.getResourceUsageMetrics();
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("SPEC(%d) %d -> %d %d %d %d %d %d %d", id(), i, i + j * maps, info.getOutputRecords(), info.getOutputBytes(), info.getResourceUsageMetrics().getCumulativeCpuUsage(), info.getResourceUsageMetrics().getPhysicalMemoryUsage(), info.getResourceUsageMetrics().getVirtualMemoryUsage(), info.getResourceUsageMetrics().getHeapUsage()));
            }
        }
        final TaskInfo info = jobdesc.getTaskInfo(TaskType.MAP, i);
        long possiblyCompressedInputBytes = info.getInputBytes();
        Configuration conf = job.getConfiguration();
        long uncompressedInputBytes = CompressionEmulationUtil.getUncompressedInputBytes(possiblyCompressedInputBytes, conf);
        splits.add(new LoadSplit(striper.splitFor(inputDir, uncompressedInputBytes, 3), maps, i, uncompressedInputBytes, info.getInputRecords(), info.getOutputBytes(), info.getOutputRecords(), reduceByteRatio, reduceRecordRatio, specBytes, specRecords, info.getResourceUsageMetrics(), metrics));
    }
    pushDescription(id(), splits);
}

Also used : ResourceUsageMetrics(org.apache.hadoop.tools.rumen.ResourceUsageMetrics) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) TaskInfo(org.apache.hadoop.tools.rumen.TaskInfo) JobStory(org.apache.hadoop.tools.rumen.JobStory) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

ArrayList (java.util.ArrayList)2 JobStory (org.apache.hadoop.tools.rumen.JobStory)2 TaskInfo (org.apache.hadoop.tools.rumen.TaskInfo)2 Configuration (org.apache.hadoop.conf.Configuration)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 TaskType (org.apache.hadoop.mapreduce.TaskType)1 ResourceUsageMetrics (org.apache.hadoop.tools.rumen.ResourceUsageMetrics)1