Search in sources :

Example 11 with JobStatus

use of org.apache.hadoop.mapreduce.JobStatus in project hadoop by apache.

the class StressJobFactory method checkLoadAndGetSlotsToBackfill.

/**
   * We try to use some light-weight mechanism to determine cluster load.
   *
   * @throws java.io.IOException
   */
protected void checkLoadAndGetSlotsToBackfill() throws IOException, InterruptedException {
    if (loadStatus.getJobLoad() <= 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(System.currentTimeMillis() + " [JobLoad] Overloaded is " + Boolean.TRUE.toString() + " NumJobsBackfill is " + loadStatus.getJobLoad());
        }
        // stop calculation because we know it is overloaded.
        return;
    }
    int mapCapacity = loadStatus.getMapCapacity();
    int reduceCapacity = loadStatus.getReduceCapacity();
    // return if the cluster status is not set
    if (mapCapacity < 0 || reduceCapacity < 0) {
        // missing cluster status will result into blocking of job submission
        return;
    }
    // Determine the max permissible map & reduce task load
    int maxMapLoad = (int) (overloadMapTaskMapSlotRatio * mapCapacity);
    int maxReduceLoad = (int) (overloadReduceTaskReduceSlotRatio * reduceCapacity);
    // compute the total number of map & reduce tasks submitted
    int totalMapTasks = ClusterStats.getSubmittedMapTasks();
    int totalReduceTasks = ClusterStats.getSubmittedReduceTasks();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total submitted map tasks: " + totalMapTasks);
        LOG.debug("Total submitted reduce tasks: " + totalReduceTasks);
        LOG.debug("Max map load: " + maxMapLoad);
        LOG.debug("Max reduce load: " + maxReduceLoad);
    }
    // generate a pessimistic bound on the max running+pending map tasks
    // this check is to avoid the heavy-duty actual map load calculation
    int mapSlotsBackFill = (int) (maxMapLoad - totalMapTasks);
    // generate a pessimistic bound on the max running+pending reduce tasks
    // this check is to avoid the heavy-duty actual reduce load calculation
    int reduceSlotsBackFill = (int) (maxReduceLoad - totalReduceTasks);
    // maintain a list of seen job ids
    Set<JobID> seenJobIDs = new HashSet<JobID>();
    // permissible limit
    if (totalMapTasks > maxMapLoad || totalReduceTasks > maxReduceLoad) {
        // if yes, calculate the real load
        // include pending & running map tasks.
        float incompleteMapTasks = 0;
        // include pending & running reduce tasks
        float incompleteReduceTasks = 0;
        for (JobStats job : ClusterStats.getRunningJobStats()) {
            JobID id = job.getJob().getJobID();
            seenJobIDs.add(id);
            // should be smart enough to take care of completed jobs.
            if (blacklistedJobs.contains(id)) {
                LOG.warn("Ignoring blacklisted job: " + id);
                continue;
            }
            int noOfMaps = job.getNoOfMaps();
            int noOfReduces = job.getNoOfReds();
            //       What otherwise?
            if (noOfMaps > 0 || noOfReduces > 0) {
                // get the job's status
                JobStatus status = job.getJobStatus();
                // blacklist completed jobs and continue
                if (status != null && status.isJobComplete()) {
                    LOG.warn("Blacklisting completed job: " + id);
                    blacklistedJobs.add(id);
                    continue;
                }
                // get the map and reduce tasks' progress
                float mapProgress = 0f;
                float reduceProgress = 0f;
                // check if the status is missing (this can happen for unpolled jobs)
                if (status != null) {
                    mapProgress = status.getMapProgress();
                    reduceProgress = status.getReduceProgress();
                }
                incompleteMapTasks += calcEffectiveIncompleteMapTasks(mapCapacity, noOfMaps, mapProgress);
                // bail out early
                int currentMapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
                if (currentMapSlotsBackFill <= 0) {
                    // reset the reduce task load since we are bailing out
                    incompleteReduceTasks = totalReduceTasks;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Terminating overload check due to high map load.");
                    }
                    break;
                }
                // compute the real reduce load
                if (noOfReduces > 0) {
                    incompleteReduceTasks += calcEffectiveIncompleteReduceTasks(reduceCapacity, noOfReduces, reduceProgress);
                }
                // bail out early
                int currentReduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
                if (currentReduceSlotsBackFill <= 0) {
                    // reset the map task load since we are bailing out
                    incompleteMapTasks = totalMapTasks;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Terminating overload check due to high reduce load.");
                    }
                    break;
                }
            } else {
                LOG.warn("Blacklisting empty job: " + id);
                blacklistedJobs.add(id);
            }
        }
        // calculate the real map load on the cluster
        mapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
        // calculate the real reduce load on the cluster
        reduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
        // clean up the backlisted set to keep the memory footprint minimal
        // retain only the jobs that are seen in this cycle
        blacklistedJobs.retainAll(seenJobIDs);
        if (LOG.isDebugEnabled() && blacklistedJobs.size() > 0) {
            LOG.debug("Blacklisted jobs count: " + blacklistedJobs.size());
        }
    }
    // update
    loadStatus.updateMapLoad(mapSlotsBackFill);
    loadStatus.updateReduceLoad(reduceSlotsBackFill);
    if (loadStatus.getMapLoad() <= 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(System.currentTimeMillis() + " [MAP-LOAD] Overloaded is " + Boolean.TRUE.toString() + " MapSlotsBackfill is " + loadStatus.getMapLoad());
        }
        // stop calculation because we know it is overloaded.
        return;
    }
    if (loadStatus.getReduceLoad() <= 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(System.currentTimeMillis() + " [REDUCE-LOAD] Overloaded is " + Boolean.TRUE.toString() + " ReduceSlotsBackfill is " + loadStatus.getReduceLoad());
        }
        // stop calculation because we know it is overloaded.
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(System.currentTimeMillis() + " [OVERALL] Overloaded is " + Boolean.FALSE.toString() + "Current load Status is " + loadStatus);
    }
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) JobID(org.apache.hadoop.mapreduce.JobID) HashSet(java.util.HashSet) JobStats(org.apache.hadoop.mapred.gridmix.Statistics.JobStats)

Example 12 with JobStatus

use of org.apache.hadoop.mapreduce.JobStatus in project hadoop by apache.

the class TestCLI method mockJob.

private Job mockJob(Cluster mockCluster, String jobId, State jobState) throws IOException, InterruptedException {
    Job mockJob = mock(Job.class);
    when(mockCluster.getJob(JobID.forName(jobId))).thenReturn(mockJob);
    JobStatus status = new JobStatus(null, 0, 0, 0, 0, jobState, JobPriority.HIGH, null, null, null, null);
    when(mockJob.getStatus()).thenReturn(status);
    return mockJob;
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) Job(org.apache.hadoop.mapreduce.Job)

Example 13 with JobStatus

use of org.apache.hadoop.mapreduce.JobStatus in project ignite by apache.

the class HadoopClientProtocolSelfTest method checkJobSubmit.

/**
     * Test job submission.
     *
     * @param noCombiners Whether there are no combiners.
     * @param noReducers Whether there are no reducers.
     * @throws Exception If failed.
     */
public void checkJobSubmit(boolean noCombiners, boolean noReducers) throws Exception {
    IgniteFileSystem igfs = grid(0).fileSystem(HadoopAbstractSelfTest.igfsName);
    igfs.mkdirs(new IgfsPath(PATH_INPUT));
    try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(igfs.create(new IgfsPath(PATH_INPUT + "/test.file"), true)))) {
        bw.write("word");
    }
    Configuration conf = config(HadoopAbstractSelfTest.REST_PORT);
    final Job job = Job.getInstance(conf);
    try {
        job.setJobName(JOB_NAME);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setMapperClass(TestMapper.class);
        job.setReducerClass(TestReducer.class);
        if (!noCombiners)
            job.setCombinerClass(TestCombiner.class);
        if (noReducers)
            job.setNumReduceTasks(0);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TestOutputFormat.class);
        FileInputFormat.setInputPaths(job, new Path(PATH_INPUT));
        FileOutputFormat.setOutputPath(job, new Path(PATH_OUTPUT));
        job.submit();
        JobID jobId = job.getJobID();
        // Setup phase.
        JobStatus jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() >= 0.0f && jobStatus.getSetupProgress() < 1.0f;
        assert jobStatus.getMapProgress() == 0.0f;
        assert jobStatus.getReduceProgress() == 0.0f;
        U.sleep(2100);
        JobStatus recentJobStatus = job.getStatus();
        assert recentJobStatus.getSetupProgress() > jobStatus.getSetupProgress() : "Old=" + jobStatus.getSetupProgress() + ", new=" + recentJobStatus.getSetupProgress();
        // Transferring to map phase.
        setupLockFile.delete();
        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {

            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getSetupProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);
        // Map phase.
        jobStatus = job.getStatus();
        checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() >= 0.0f && jobStatus.getMapProgress() < 1.0f;
        assert jobStatus.getReduceProgress() == 0.0f;
        U.sleep(2100);
        recentJobStatus = job.getStatus();
        assert recentJobStatus.getMapProgress() > jobStatus.getMapProgress() : "Old=" + jobStatus.getMapProgress() + ", new=" + recentJobStatus.getMapProgress();
        // Transferring to reduce phase.
        mapLockFile.delete();
        assert GridTestUtils.waitForCondition(new GridAbsPredicate() {

            @Override
            public boolean apply() {
                try {
                    return F.eq(1.0f, job.getStatus().getMapProgress());
                } catch (Exception e) {
                    throw new RuntimeException("Unexpected exception.", e);
                }
            }
        }, 5000L);
        if (!noReducers) {
            // Reduce phase.
            jobStatus = job.getStatus();
            checkJobStatus(jobStatus, jobId, JOB_NAME, JobStatus.State.RUNNING, 0.0f);
            assert jobStatus.getSetupProgress() == 1.0f;
            assert jobStatus.getMapProgress() == 1.0f;
            assert jobStatus.getReduceProgress() >= 0.0f && jobStatus.getReduceProgress() < 1.0f;
            // Ensure that reduces progress increases.
            U.sleep(2100);
            recentJobStatus = job.getStatus();
            assert recentJobStatus.getReduceProgress() > jobStatus.getReduceProgress() : "Old=" + jobStatus.getReduceProgress() + ", new=" + recentJobStatus.getReduceProgress();
            reduceLockFile.delete();
        }
        job.waitForCompletion(false);
        jobStatus = job.getStatus();
        checkJobStatus(job.getStatus(), jobId, JOB_NAME, JobStatus.State.SUCCEEDED, 1.0f);
        assert jobStatus.getSetupProgress() == 1.0f;
        assert jobStatus.getMapProgress() == 1.0f;
        assert jobStatus.getReduceProgress() == 1.0f;
        dumpIgfs(igfs, new IgfsPath(PATH_OUTPUT));
    } finally {
        job.getCluster().close();
    }
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) GridAbsPredicate(org.apache.ignite.internal.util.lang.GridAbsPredicate) IgniteFileSystem(org.apache.ignite.IgniteFileSystem) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) IgfsPath(org.apache.ignite.igfs.IgfsPath) JobStatus(org.apache.hadoop.mapreduce.JobStatus) OutputStreamWriter(java.io.OutputStreamWriter) Job(org.apache.hadoop.mapreduce.Job) JobID(org.apache.hadoop.mapreduce.JobID)

Example 14 with JobStatus

use of org.apache.hadoop.mapreduce.JobStatus in project cdap by caskdata.

the class MapReduceMetricsWriter method reportMapredStats.

// job level stats from counters built in to mapreduce
private void reportMapredStats(Counters jobCounters) throws IOException, InterruptedException {
    JobStatus jobStatus = jobConf.getStatus();
    // map stats
    float mapProgress = jobStatus.getMapProgress();
    int runningMappers = 0;
    int runningReducers = 0;
    for (TaskReport tr : jobConf.getTaskReports(TaskType.MAP)) {
        reportMapTaskMetrics(tr);
        runningMappers += tr.getRunningTaskAttemptIds().size();
    }
    for (TaskReport tr : jobConf.getTaskReports(TaskType.REDUCE)) {
        reportReduceTaskMetrics(tr);
        runningReducers += tr.getRunningTaskAttemptIds().size();
    }
    int memoryPerMapper = jobConf.getConfiguration().getInt(Job.MAP_MEMORY_MB, Job.DEFAULT_MAP_MEMORY_MB);
    int memoryPerReducer = jobConf.getConfiguration().getInt(Job.REDUCE_MEMORY_MB, Job.DEFAULT_REDUCE_MEMORY_MB);
    long mapInputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_INPUT_RECORDS);
    long mapOutputRecords = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_RECORDS);
    long mapOutputBytes = getTaskCounter(jobCounters, TaskCounter.MAP_OUTPUT_BYTES);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (mapProgress * 100));
    mapperMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, mapInputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, mapOutputRecords);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_BYTES, mapOutputBytes);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningMappers);
    mapperMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningMappers * memoryPerMapper);
    LOG.trace("Reporting mapper stats: (completion, containers, memory) = ({}, {}, {})", (int) (mapProgress * 100), runningMappers, runningMappers * memoryPerMapper);
    // reduce stats
    float reduceProgress = jobStatus.getReduceProgress();
    long reduceInputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_INPUT_RECORDS);
    long reduceOutputRecords = getTaskCounter(jobCounters, TaskCounter.REDUCE_OUTPUT_RECORDS);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_COMPLETION, (long) (reduceProgress * 100));
    reducerMetrics.gauge(MapReduceMetrics.METRIC_INPUT_RECORDS, reduceInputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_OUTPUT_RECORDS, reduceOutputRecords);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_CONTAINERS, runningReducers);
    reducerMetrics.gauge(MapReduceMetrics.METRIC_USED_MEMORY, runningReducers * memoryPerReducer);
    LOG.trace("Reporting reducer stats: (completion, containers, memory) = ({}, {}, {})", (int) (reduceProgress * 100), runningReducers, runningReducers * memoryPerReducer);
}
Also used : JobStatus(org.apache.hadoop.mapreduce.JobStatus) TaskReport(org.apache.hadoop.mapreduce.TaskReport)

Example 15 with JobStatus

use of org.apache.hadoop.mapreduce.JobStatus in project pinot by linkedin.

the class ThirdEyeJob method run.

@SuppressWarnings("unchecked")
public void run() throws Exception {
    LOGGER.info("Input config:{}", inputConfig);
    PhaseSpec phaseSpec;
    try {
        phaseSpec = PhaseSpec.valueOf(phaseName.toUpperCase());
    } catch (Exception e) {
        usage();
        throw e;
    }
    if (PhaseSpec.TRANSFORM.equals(phaseSpec)) {
        TransformPhaseJob job = new TransformPhaseJob("Transform Job", inputConfig);
        job.run();
        return;
    } else if (PhaseSpec.JOIN.equals(phaseSpec)) {
        JoinPhaseJob job = new JoinPhaseJob("Join Job", inputConfig);
        job.run();
        return;
    } else if (PhaseSpec.WAIT.equals(phaseSpec)) {
        WaitPhaseJob job = new WaitPhaseJob("Wait for inputs", inputConfig);
        job.run();
        return;
    }
    // Get root, collection, input paths
    String root = getAndCheck(ThirdEyeJobProperties.THIRDEYE_ROOT.getName(), inputConfig);
    String collection = getAndCheck(ThirdEyeJobProperties.THIRDEYE_COLLECTION.getName(), inputConfig);
    String inputPaths = getAndCheck(ThirdEyeJobProperties.INPUT_PATHS.getName(), inputConfig);
    // Get min / max time
    DateTime minTime;
    DateTime maxTime;
    String minTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MIN.getName());
    String maxTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MAX.getName());
    minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp);
    maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp);
    Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection, minTime, maxTime, inputPaths);
    for (Object key : inputConfig.keySet()) {
        jobProperties.setProperty(key.toString(), inputConfig.getProperty(key.toString()));
    }
    // Instantiate the job
    Constructor<Configured> constructor = (Constructor<Configured>) phaseSpec.getKlazz().getConstructor(String.class, Properties.class);
    Configured instance = constructor.newInstance(phaseSpec.getName(), jobProperties);
    setMapreduceConfig(instance.getConf());
    // Run the job
    Method runMethod = instance.getClass().getMethod("run");
    Job job = (Job) runMethod.invoke(instance);
    if (job != null) {
        JobStatus status = job.getStatus();
        if (status.getState() != JobStatus.State.SUCCEEDED) {
            throw new RuntimeException("Job " + job.getJobName() + " failed to execute: Ran with config:" + jobProperties);
        }
    }
}
Also used : Constructor(java.lang.reflect.Constructor) Method(java.lang.reflect.Method) Properties(java.util.Properties) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Configured(org.apache.hadoop.conf.Configured) WaitPhaseJob(com.linkedin.thirdeye.hadoop.wait.WaitPhaseJob) JobStatus(org.apache.hadoop.mapreduce.JobStatus) JoinPhaseJob(com.linkedin.thirdeye.hadoop.join.JoinPhaseJob) TransformPhaseJob(com.linkedin.thirdeye.hadoop.transform.TransformPhaseJob) DerivedColumnTransformationPhaseJob(com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob) SegmentCreationPhaseJob(com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob) AggregationPhaseJob(com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob) TransformPhaseJob(com.linkedin.thirdeye.hadoop.transform.TransformPhaseJob) JoinPhaseJob(com.linkedin.thirdeye.hadoop.join.JoinPhaseJob) TopKPhaseJob(com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob) WaitPhaseJob(com.linkedin.thirdeye.hadoop.wait.WaitPhaseJob) Job(org.apache.hadoop.mapreduce.Job) BackfillPhaseJob(com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob)

Aggregations

JobStatus (org.apache.hadoop.mapreduce.JobStatus)22 Test (org.junit.Test)10 IOException (java.io.IOException)7 MRClientProtocol (org.apache.hadoop.mapreduce.v2.api.MRClientProtocol)7 Job (org.apache.hadoop.mapreduce.Job)5 GetJobReportRequest (org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest)5 JobID (org.apache.hadoop.mapreduce.JobID)4 InetSocketAddress (java.net.InetSocketAddress)3 Configuration (org.apache.hadoop.conf.Configuration)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)3 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 GetJobReportResponse (org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportResponse)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 ApplicationReport (org.apache.hadoop.yarn.api.records.ApplicationReport)2 AggregationPhaseJob (com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob)1 BackfillPhaseJob (com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob)1