Search in sources :

Example 6 with TezApplicationData

use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.

the class ReducerGCHeuristicTest method analyzeJob.

private Severity analyzeJob(long runtimeMs, long cpuMs, long gcMs) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
    TezCounterData counter = new TezCounterData();
    counter.set(TezCounterData.CounterName.CPU_MILLISECONDS, cpuMs);
    counter.set(TezCounterData.CounterName.GC_TIME_MILLIS, gcMs);
    int i = 0;
    for (; i < NUMTASKS; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTimeAndCounter(new long[] { runtimeMs, 0, 0, 0, 0 }, counter);
    }
    // Non-sampled task, which does not contain time and counter data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 7 with TezApplicationData

use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.

the class ReducerMemoryHeuristicTest method analyzeJob.

private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
    TezCounterData counter = new TezCounterData();
    counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
    Properties p = new Properties();
    p.setProperty(com.linkedin.drelephant.mapreduce.heuristics.ReducerMemoryHeuristic.REDUCER_MEMORY_CONF, Long.toString(containerMemMB));
    int i = 0;
    for (; i < NUMTASKS; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTimeAndCounter(new long[5], counter);
    }
    // Non-sampled task, which does not contain time and counter data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
    data.setConf(p);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) Properties(java.util.Properties) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 8 with TezApplicationData

use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.

the class ReducerTimeHeuristicTest method analyzeJob.

private Severity analyzeJob(long runtimeMs, int numTasks) throws IOException {
    TezCounterData dummyCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[numTasks + 1];
    int i = 0;
    for (; i < numTasks; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTime(new long[] { runtimeMs, 0, 0, 0, 0 });
        reducers[i].setCounter(dummyCounter);
    }
    // Non-sampled task, which does not contain time and counter data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(dummyCounter).setReduceTaskData(reducers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 9 with TezApplicationData

use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.

the class TezMetricsAggregator method aggregate.

@Override
public void aggregate(HadoopApplicationData hadoopData) {
    TezApplicationData data = (TezApplicationData) hadoopData;
    long mapTaskContainerSize = getMapContainerSize(data);
    long reduceTaskContainerSize = getReducerContainerSize(data);
    int reduceTaskSlowStartPercentage = (int) (Double.parseDouble(data.getConf().getProperty(REDUCER_SLOW_START_CONFIG)) * 100);
    // overwrite reduceTaskSlowStartPercentage to 100%. TODO: make use of the slow start percent
    reduceTaskSlowStartPercentage = 100;
    _mapTasks = new TezTaskLevelAggregatedMetrics(data.getMapTaskData(), mapTaskContainerSize, data.getStartTime());
    long reduceIdealStartTime = _mapTasks.getNthPercentileFinishTime(reduceTaskSlowStartPercentage);
    // Mappers list is empty
    if (reduceIdealStartTime == -1) {
        // ideal start time for reducer is infinite since it cannot start
        reduceIdealStartTime = Long.MAX_VALUE;
    }
    _reduceTasks = new TezTaskLevelAggregatedMetrics(data.getReduceTaskData(), reduceTaskContainerSize, reduceIdealStartTime);
    _hadoopAggregatedData.setResourceUsed(_mapTasks.getResourceUsed() + _reduceTasks.getResourceUsed());
    _hadoopAggregatedData.setTotalDelay(_mapTasks.getDelay() + _reduceTasks.getDelay());
    _hadoopAggregatedData.setResourceWasted(_mapTasks.getResourceWasted() + _reduceTasks.getResourceWasted());
}
Also used : TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData)

Example 10 with TezApplicationData

use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.

the class InfoExtractor method loadInfo.

/**
 * Loads result with the info depending on the application type
 *
 * @param result The jobResult to be loaded with.
 * @param data The Hadoop application data
 */
public static void loadInfo(AppResult result, HadoopApplicationData data) {
    Properties properties = new Properties();
    if (data instanceof MapReduceApplicationData) {
        properties = retrieveMapreduceProperties((MapReduceApplicationData) data);
    } else if (data instanceof SparkApplicationData) {
        properties = retrieveSparkProperties((SparkApplicationData) data);
    } else if (data instanceof TezApplicationData) {
        properties = retrieveTezProperties((TezApplicationData) data);
    }
    Scheduler scheduler = getSchedulerInstance(data.getAppId(), properties);
    if (scheduler == null) {
        logger.info("No Scheduler found for appid: " + data.getAppId());
        loadNoSchedulerInfo(result);
    } else if (StringUtils.isEmpty(scheduler.getJobDefId()) || StringUtils.isEmpty(scheduler.getJobExecId()) || StringUtils.isEmpty(scheduler.getFlowDefId()) || StringUtils.isEmpty(scheduler.getFlowExecId())) {
        logger.warn("This job doesn't have the correct " + scheduler.getSchedulerName() + " integration support. I" + " will treat this as an adhoc job");
        logger.info("No Flow/job info found for appid: " + data.getAppId());
        loadNoSchedulerInfo(result);
    } else {
        loadSchedulerInfo(result, data, scheduler);
    }
}
Also used : MapReduceApplicationData(com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData) Scheduler(com.linkedin.drelephant.schedulers.Scheduler) SparkApplicationData(com.linkedin.drelephant.spark.data.SparkApplicationData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) Properties(java.util.Properties)

Aggregations

TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)15 TezCounterData (com.linkedin.drelephant.tez.data.TezCounterData)13 TezTaskData (com.linkedin.drelephant.tez.data.TezTaskData)13 HeuristicResult (com.linkedin.drelephant.analysis.HeuristicResult)8 Properties (java.util.Properties)3 MapReduceApplicationData (com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData)1 Scheduler (com.linkedin.drelephant.schedulers.Scheduler)1 SparkApplicationData (com.linkedin.drelephant.spark.data.SparkApplicationData)1 URL (java.net.URL)1