Search in sources :

Example 41 with HeuristicResult

use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.

the class MapperTimeHeuristicTest method analyzeJob.

private Severity analyzeJob(int numTasks, long runtime) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] mappers = new TezTaskData[numTasks + 1];
    TezCounterData taskCounter = new TezCounterData();
    taskCounter.set(TezCounterData.CounterName.S3A_BYTES_READ, DUMMY_INPUT_SIZE / 4);
    int i = 0;
    for (; i < numTasks; i++) {
        mappers[i] = new TezTaskData(jobCounter, new long[] { runtime, 0, 0, 0, 0 });
    }
    // Non-sampled task, which does not contain time and counter data
    mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 42 with HeuristicResult

use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.

the class ExceptionHeuristic method apply.

@Override
public HeuristicResult apply(MapReduceApplicationData data) {
    if (data.getSucceeded()) {
        return null;
    }
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), Severity.MODERATE, 0);
    String diagnosticInfo = data.getDiagnosticInfo();
    if (diagnosticInfo != null) {
        result.addResultDetail("Error", "Stacktrace", diagnosticInfo);
    } else {
        String msg = "Unable to find stacktrace info. Please find the real problem in the Jobhistory link above." + "Exception can happen either in task log or Application Master log.";
        result.addResultDetail("Error", msg);
    }
    return result;
}
Also used : HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 43 with HeuristicResult

use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.

the class GenericSkewHeuristic method apply.

@Override
public HeuristicResult apply(MapReduceApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    MapReduceTaskData[] tasks = getTasks(data);
    // Gathering data for checking time skew
    List<Long> timeTaken = new ArrayList<Long>();
    for (int i = 0; i < tasks.length; i++) {
        if (tasks[i].isTimeDataPresent()) {
            timeTaken.add(tasks[i].getTotalRunTimeMs());
        }
    }
    long[][] groupsTime = Statistics.findTwoGroups(Longs.toArray(timeTaken));
    long timeAvg1 = Statistics.average(groupsTime[0]);
    long timeAvg2 = Statistics.average(groupsTime[1]);
    // seconds are used for calculating deviation as they provide a better idea than millisecond.
    long timeAvgSec1 = TimeUnit.MILLISECONDS.toSeconds(timeAvg1);
    long timeAvgSec2 = TimeUnit.MILLISECONDS.toSeconds(timeAvg2);
    long minTime = Math.min(timeAvgSec1, timeAvgSec2);
    long diffTime = Math.abs(timeAvgSec1 - timeAvgSec2);
    // using the same deviation limits for time skew as for data skew. It can be changed in the fututre.
    Severity severityTime = getDeviationSeverity(minTime, diffTime);
    // This reduces severity if number of tasks is insignificant
    severityTime = Severity.min(severityTime, Severity.getSeverityAscending(groupsTime[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
    // Gather data
    List<Long> inputBytes = new ArrayList<Long>();
    for (int i = 0; i < tasks.length; i++) {
        if (tasks[i].isCounterDataPresent()) {
            long inputByte = 0;
            for (MapReduceCounterData.CounterName counterName : _counterNames) {
                inputByte += tasks[i].getCounters().get(counterName);
            }
            inputBytes.add(inputByte);
        }
    }
    // Ratio of total tasks / sampled tasks
    double scale = ((double) tasks.length) / inputBytes.size();
    // Analyze data. TODO: This is a temp fix. findTwogroups should support list as input
    long[][] groups = Statistics.findTwoGroups(Longs.toArray(inputBytes));
    long avg1 = Statistics.average(groups[0]);
    long avg2 = Statistics.average(groups[1]);
    long min = Math.min(avg1, avg2);
    long diff = Math.abs(avg2 - avg1);
    Severity severityData = getDeviationSeverity(min, diff);
    // This reduces severity if the largest file sizes are insignificant
    severityData = Severity.min(severityData, getFilesSeverity(avg2));
    // This reduces severity if number of tasks is insignificant
    severityData = Severity.min(severityData, Severity.getSeverityAscending(groups[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
    Severity severity = Severity.max(severityData, severityTime);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severityData, tasks.length));
    result.addResultDetail("Data skew (Number of tasks)", Integer.toString(tasks.length));
    result.addResultDetail("Data skew (Group A)", groups[0].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg1) + " avg");
    result.addResultDetail("Data skew (Group B)", groups[1].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg2) + " avg");
    result.addResultDetail("Time skew (Number of tasks)", Integer.toString(tasks.length));
    result.addResultDetail("Time skew (Group A)", groupsTime[0].length + " tasks @ " + convertTimeMs(timeAvg1) + " avg");
    result.addResultDetail("Time skew (Group B)", groupsTime[1].length + " tasks @ " + convertTimeMs(timeAvg2) + " avg");
    return result;
}
Also used : MapReduceCounterData(com.linkedin.drelephant.mapreduce.data.MapReduceCounterData) ArrayList(java.util.ArrayList) Severity(com.linkedin.drelephant.analysis.Severity) MapReduceTaskData(com.linkedin.drelephant.mapreduce.data.MapReduceTaskData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 44 with HeuristicResult

use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.

the class MapperSpeedHeuristic method apply.

@Override
public HeuristicResult apply(MapReduceApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    long totalInputByteSize = 0;
    MapReduceTaskData[] tasks = data.getMapperData();
    List<Long> inputByteSizes = new ArrayList<Long>();
    List<Long> speeds = new ArrayList<Long>();
    List<Long> runtimesMs = new ArrayList<Long>();
    for (MapReduceTaskData task : tasks) {
        if (task.isTimeAndCounterDataPresent()) {
            long inputBytes = 0;
            for (MapReduceCounterData.CounterName counterName : _counterNames) {
                inputBytes += task.getCounters().get(counterName);
            }
            long runtimeMs = task.getTotalRunTimeMs();
            inputByteSizes.add(inputBytes);
            totalInputByteSize += inputBytes;
            runtimesMs.add(runtimeMs);
            // Speed is bytes per second
            speeds.add((1000 * inputBytes) / (runtimeMs));
        }
    }
    long medianSpeed;
    long medianSize;
    long medianRuntimeMs;
    if (tasks.length != 0) {
        medianSpeed = Statistics.median(speeds);
        medianSize = Statistics.median(inputByteSizes);
        medianRuntimeMs = Statistics.median(runtimesMs);
    } else {
        medianSpeed = 0;
        medianSize = 0;
        medianRuntimeMs = 0;
    }
    Severity severity = getDiskSpeedSeverity(medianSpeed);
    // This reduces severity if task runtime is insignificant
    severity = Severity.min(severity, getRuntimeSeverity(medianRuntimeMs));
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Median task input size", FileUtils.byteCountToDisplaySize(medianSize));
    result.addResultDetail("Median task runtime", Statistics.readableTimespan(medianRuntimeMs));
    result.addResultDetail("Median task speed", FileUtils.byteCountToDisplaySize(medianSpeed) + "/s");
    result.addResultDetail(CommonConstantsHeuristic.TOTAL_INPUT_SIZE_IN_MB, totalInputByteSize * 1.0 / (FileUtils.ONE_MB) + "");
    return result;
}
Also used : MapReduceCounterData(com.linkedin.drelephant.mapreduce.data.MapReduceCounterData) MapReduceTaskData(com.linkedin.drelephant.mapreduce.data.MapReduceTaskData) ArrayList(java.util.ArrayList) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 45 with HeuristicResult

use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.

the class MapperSpillHeuristic method apply.

@Override
public HeuristicResult apply(MapReduceApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    MapReduceTaskData[] tasks = data.getMapperData();
    long totalSpills = 0;
    long totalOutputRecords = 0;
    double ratioSpills = 0.0;
    for (MapReduceTaskData task : tasks) {
        if (task.isCounterDataPresent()) {
            totalSpills += task.getCounters().get(MapReduceCounterData.CounterName.SPILLED_RECORDS);
            totalOutputRecords += task.getCounters().get(MapReduceCounterData.CounterName.MAP_OUTPUT_RECORDS);
        }
    }
    // If both totalSpills and totalOutputRecords are zero then set ratioSpills to zero.
    if (totalSpills == 0) {
        ratioSpills = 0;
    } else {
        ratioSpills = (double) totalSpills / (double) totalOutputRecords;
    }
    Severity severity = getSpillSeverity(ratioSpills);
    // Severity is reduced if number of tasks is small
    Severity taskSeverity = getNumTasksSeverity(tasks.length);
    severity = Severity.min(severity, taskSeverity);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Avg spilled records per task", tasks.length == 0 ? "0" : Long.toString(totalSpills / tasks.length));
    result.addResultDetail("Avg output records per task", tasks.length == 0 ? "0" : Long.toString(totalOutputRecords / tasks.length));
    result.addResultDetail("Ratio of spilled records to output records", Double.toString(ratioSpills));
    return result;
}
Also used : MapReduceTaskData(com.linkedin.drelephant.mapreduce.data.MapReduceTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Aggregations

HeuristicResult (com.linkedin.drelephant.analysis.HeuristicResult)48 MapReduceTaskData (com.linkedin.drelephant.mapreduce.data.MapReduceTaskData)23 MapReduceApplicationData (com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData)22 Severity (com.linkedin.drelephant.analysis.Severity)16 MapReduceCounterData (com.linkedin.drelephant.mapreduce.data.MapReduceCounterData)15 TezTaskData (com.linkedin.drelephant.tez.data.TezTaskData)15 TezCounterData (com.linkedin.drelephant.tez.data.TezCounterData)11 ArrayList (java.util.ArrayList)11 TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)8 Test (org.junit.Test)8 Properties (java.util.Properties)7 HashMap (java.util.HashMap)1