Search in sources :

Example 11 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class ReducerMemoryHeuristicTest method analyzeJob.

private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
    TezCounterData counter = new TezCounterData();
    counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
    Properties p = new Properties();
    p.setProperty(com.linkedin.drelephant.mapreduce.heuristics.ReducerMemoryHeuristic.REDUCER_MEMORY_CONF, Long.toString(containerMemMB));
    int i = 0;
    for (; i < NUMTASKS; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTimeAndCounter(new long[5], counter);
    }
    // Non-sampled task, which does not contain time and counter data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
    data.setConf(p);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) Properties(java.util.Properties) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 12 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class ReducerTimeHeuristicTest method analyzeJob.

private Severity analyzeJob(long runtimeMs, int numTasks) throws IOException {
    TezCounterData dummyCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[numTasks + 1];
    int i = 0;
    for (; i < numTasks; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTime(new long[] { runtimeMs, 0, 0, 0, 0 });
        reducers[i].setCounter(dummyCounter);
    }
    // Non-sampled task, which does not contain time and counter data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(dummyCounter).setReduceTaskData(reducers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 13 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class TezTaskLevelAggregatedMetrics method compute.

/**
 * Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage.
 * @param taskDatas
 * @param containerSize
 * @param idealStartTime
 */
private void compute(TezTaskData[] taskDatas, long containerSize, long idealStartTime) {
    long peakMemoryNeed = 0;
    long taskFinishTimeMax = 0;
    long taskDurationMax = 0;
    // if there are zero tasks, then nothing to compute.
    if (taskDatas == null || taskDatas.length == 0) {
        return;
    }
    for (TezTaskData taskData : taskDatas) {
        if (!taskData.isSampled()) {
            continue;
        }
        // MB
        long taskMemory = taskData.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES) / FileUtils.ONE_MB;
        // MB
        long taskVM = taskData.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES) / FileUtils.ONE_MB;
        // Milliseconds
        long taskDuration = taskData.getFinishTime() - taskData.getStartTime();
        // MB Seconds
        long taskCost = (containerSize) * (taskDuration / Statistics.SECOND_IN_MS);
        durations.add(taskDuration);
        finishTimes.add(taskData.getFinishTime());
        // peak Memory usage
        long memoryRequiredForVM = (long) (taskVM / CLUSTER_MEMORY_FACTOR);
        long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory;
        peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed;
        if (taskFinishTimeMax < taskData.getFinishTime()) {
            taskFinishTimeMax = taskData.getFinishTime();
        }
        if (taskDurationMax < taskDuration) {
            taskDurationMax = taskDuration;
        }
        _resourceUsed += taskCost;
    }
    // Compute the delay in starting the task.
    _delay = taskFinishTimeMax - (idealStartTime + taskDurationMax);
    // invalid delay
    if (_delay < 0) {
        _delay = 0;
    }
    // wastedResources
    long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER);
    if (wastedMemory > 0) {
        for (long duration : durations) {
            // MB Seconds
            _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS);
        }
    }
}
Also used : TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData)

Example 14 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class GenericDataSkewHeuristic method apply.

public HeuristicResult apply(TezApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    TezTaskData[] tasks = getTasks(data);
    // Gathering data for checking time skew
    List<Long> timeTaken = new ArrayList<Long>();
    for (int i = 0; i < tasks.length; i++) {
        if (tasks[i].isSampled()) {
            timeTaken.add(tasks[i].getTotalRunTimeMs());
        }
    }
    long[][] groupsTime = Statistics.findTwoGroups(Longs.toArray(timeTaken));
    long timeAvg1 = Statistics.average(groupsTime[0]);
    long timeAvg2 = Statistics.average(groupsTime[1]);
    // seconds are used for calculating deviation as they provide a better idea than millisecond.
    long timeAvgSec1 = TimeUnit.MILLISECONDS.toSeconds(timeAvg1);
    long timeAvgSec2 = TimeUnit.MILLISECONDS.toSeconds(timeAvg2);
    long minTime = Math.min(timeAvgSec1, timeAvgSec2);
    long diffTime = Math.abs(timeAvgSec1 - timeAvgSec2);
    // using the same deviation limits for time skew as for data skew. It can be changed in the fututre.
    Severity severityTime = getDeviationSeverity(minTime, diffTime);
    // This reduces severity if number of tasks is insignificant
    severityTime = Severity.min(severityTime, Severity.getSeverityAscending(groupsTime[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
    // Gather data
    List<Long> inputSizes = new ArrayList<Long>();
    for (int i = 0; i < tasks.length; i++) {
        if (tasks[i].isSampled()) {
            long inputByte = 0;
            for (TezCounterData.CounterName counterName : _counterNames) {
                inputByte += tasks[i].getCounters().get(counterName);
            }
            inputSizes.add(inputByte);
        }
    }
    long[][] groups = Statistics.findTwoGroups(Longs.toArray(inputSizes));
    long avg1 = Statistics.average(groups[0]);
    long avg2 = Statistics.average(groups[1]);
    long min = Math.min(avg1, avg2);
    long diff = Math.abs(avg2 - avg1);
    Severity severityData = getDeviationSeverity(min, diff);
    // This reduces severity if the largest file sizes are insignificant
    severityData = Severity.min(severityData, getFilesSeverity(avg2));
    // This reduces severity if number of tasks is insignificant
    severityData = Severity.min(severityData, Severity.getSeverityAscending(groups[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
    Severity severity = Severity.max(severityData, severityTime);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severityData, tasks.length));
    result.addResultDetail("Data skew (Number of tasks)", Integer.toString(tasks.length));
    result.addResultDetail("Data skew (Group A)", groups[0].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg1) + " avg");
    result.addResultDetail("Data skew (Group B)", groups[1].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg2) + " avg");
    result.addResultDetail("Time skew (Number of tasks)", Integer.toString(tasks.length));
    result.addResultDetail("Time skew (Group A)", groupsTime[0].length + " tasks @ " + convertTimeMs(timeAvg1) + " avg");
    result.addResultDetail("Time skew (Group B)", groupsTime[1].length + " tasks @ " + convertTimeMs(timeAvg2) + " avg");
    return result;
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) ArrayList(java.util.ArrayList) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 15 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class ReducerTimeHeuristic method apply.

public HeuristicResult apply(TezApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    TezTaskData[] tasks = data.getReduceTaskData();
    List<Long> runtimesMs = new ArrayList<Long>();
    long taskMinMs = Long.MAX_VALUE;
    long taskMaxMs = 0;
    for (TezTaskData task : tasks) {
        if (task.isSampled()) {
            long taskTime = task.getTotalRunTimeMs();
            runtimesMs.add(taskTime);
            taskMinMs = Math.min(taskMinMs, taskTime);
            taskMaxMs = Math.max(taskMaxMs, taskTime);
        }
    }
    if (taskMinMs == Long.MAX_VALUE) {
        taskMinMs = 0;
    }
    long averageTimeMs = Statistics.average(runtimesMs);
    Severity shortTaskSeverity = shortTaskSeverity(tasks.length, averageTimeMs);
    Severity longTaskSeverity = longTaskSeverity(tasks.length, averageTimeMs);
    Severity severity = Severity.max(shortTaskSeverity, longTaskSeverity);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Average task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(averageTimeMs).equals("") ? "0 sec" : Statistics.readableTimespan(averageTimeMs)));
    result.addResultDetail("Max task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMaxMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMaxMs)));
    result.addResultDetail("Min task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMinMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMinMs)));
    return result;
}
Also used : TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Aggregations

TezTaskData (com.linkedin.drelephant.tez.data.TezTaskData)22 TezCounterData (com.linkedin.drelephant.tez.data.TezCounterData)17 HeuristicResult (com.linkedin.drelephant.analysis.HeuristicResult)15 TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)13 Severity (com.linkedin.drelephant.analysis.Severity)7 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)2 URL (java.net.URL)1 Test (org.junit.Test)1