use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class MapperTimeHeuristicTest method analyzeJob.
private Severity analyzeJob(int numTasks, long runtime) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] mappers = new TezTaskData[numTasks + 1];
TezCounterData taskCounter = new TezCounterData();
taskCounter.set(TezCounterData.CounterName.S3A_BYTES_READ, DUMMY_INPUT_SIZE / 4);
int i = 0;
for (; i < numTasks; i++) {
mappers[i] = new TezTaskData(jobCounter, new long[] { runtime, 0, 0, 0, 0 });
}
// Non-sampled task, which does not contain time and counter data
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class ExceptionHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (data.getSucceeded()) {
return null;
}
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), Severity.MODERATE, 0);
String diagnosticInfo = data.getDiagnosticInfo();
if (diagnosticInfo != null) {
result.addResultDetail("Error", "Stacktrace", diagnosticInfo);
} else {
String msg = "Unable to find stacktrace info. Please find the real problem in the Jobhistory link above." + "Exception can happen either in task log or Application Master log.";
result.addResultDetail("Error", msg);
}
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class GenericSkewHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = getTasks(data);
// Gathering data for checking time skew
List<Long> timeTaken = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isTimeDataPresent()) {
timeTaken.add(tasks[i].getTotalRunTimeMs());
}
}
long[][] groupsTime = Statistics.findTwoGroups(Longs.toArray(timeTaken));
long timeAvg1 = Statistics.average(groupsTime[0]);
long timeAvg2 = Statistics.average(groupsTime[1]);
// seconds are used for calculating deviation as they provide a better idea than millisecond.
long timeAvgSec1 = TimeUnit.MILLISECONDS.toSeconds(timeAvg1);
long timeAvgSec2 = TimeUnit.MILLISECONDS.toSeconds(timeAvg2);
long minTime = Math.min(timeAvgSec1, timeAvgSec2);
long diffTime = Math.abs(timeAvgSec1 - timeAvgSec2);
// using the same deviation limits for time skew as for data skew. It can be changed in the fututre.
Severity severityTime = getDeviationSeverity(minTime, diffTime);
// This reduces severity if number of tasks is insignificant
severityTime = Severity.min(severityTime, Severity.getSeverityAscending(groupsTime[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
// Gather data
List<Long> inputBytes = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isCounterDataPresent()) {
long inputByte = 0;
for (MapReduceCounterData.CounterName counterName : _counterNames) {
inputByte += tasks[i].getCounters().get(counterName);
}
inputBytes.add(inputByte);
}
}
// Ratio of total tasks / sampled tasks
double scale = ((double) tasks.length) / inputBytes.size();
// Analyze data. TODO: This is a temp fix. findTwogroups should support list as input
long[][] groups = Statistics.findTwoGroups(Longs.toArray(inputBytes));
long avg1 = Statistics.average(groups[0]);
long avg2 = Statistics.average(groups[1]);
long min = Math.min(avg1, avg2);
long diff = Math.abs(avg2 - avg1);
Severity severityData = getDeviationSeverity(min, diff);
// This reduces severity if the largest file sizes are insignificant
severityData = Severity.min(severityData, getFilesSeverity(avg2));
// This reduces severity if number of tasks is insignificant
severityData = Severity.min(severityData, Severity.getSeverityAscending(groups[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
Severity severity = Severity.max(severityData, severityTime);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severityData, tasks.length));
result.addResultDetail("Data skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Data skew (Group A)", groups[0].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg1) + " avg");
result.addResultDetail("Data skew (Group B)", groups[1].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg2) + " avg");
result.addResultDetail("Time skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Time skew (Group A)", groupsTime[0].length + " tasks @ " + convertTimeMs(timeAvg1) + " avg");
result.addResultDetail("Time skew (Group B)", groupsTime[1].length + " tasks @ " + convertTimeMs(timeAvg2) + " avg");
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class MapperSpeedHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
long totalInputByteSize = 0;
MapReduceTaskData[] tasks = data.getMapperData();
List<Long> inputByteSizes = new ArrayList<Long>();
List<Long> speeds = new ArrayList<Long>();
List<Long> runtimesMs = new ArrayList<Long>();
for (MapReduceTaskData task : tasks) {
if (task.isTimeAndCounterDataPresent()) {
long inputBytes = 0;
for (MapReduceCounterData.CounterName counterName : _counterNames) {
inputBytes += task.getCounters().get(counterName);
}
long runtimeMs = task.getTotalRunTimeMs();
inputByteSizes.add(inputBytes);
totalInputByteSize += inputBytes;
runtimesMs.add(runtimeMs);
// Speed is bytes per second
speeds.add((1000 * inputBytes) / (runtimeMs));
}
}
long medianSpeed;
long medianSize;
long medianRuntimeMs;
if (tasks.length != 0) {
medianSpeed = Statistics.median(speeds);
medianSize = Statistics.median(inputByteSizes);
medianRuntimeMs = Statistics.median(runtimesMs);
} else {
medianSpeed = 0;
medianSize = 0;
medianRuntimeMs = 0;
}
Severity severity = getDiskSpeedSeverity(medianSpeed);
// This reduces severity if task runtime is insignificant
severity = Severity.min(severity, getRuntimeSeverity(medianRuntimeMs));
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Median task input size", FileUtils.byteCountToDisplaySize(medianSize));
result.addResultDetail("Median task runtime", Statistics.readableTimespan(medianRuntimeMs));
result.addResultDetail("Median task speed", FileUtils.byteCountToDisplaySize(medianSpeed) + "/s");
result.addResultDetail(CommonConstantsHeuristic.TOTAL_INPUT_SIZE_IN_MB, totalInputByteSize * 1.0 / (FileUtils.ONE_MB) + "");
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class MapperSpillHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = data.getMapperData();
long totalSpills = 0;
long totalOutputRecords = 0;
double ratioSpills = 0.0;
for (MapReduceTaskData task : tasks) {
if (task.isCounterDataPresent()) {
totalSpills += task.getCounters().get(MapReduceCounterData.CounterName.SPILLED_RECORDS);
totalOutputRecords += task.getCounters().get(MapReduceCounterData.CounterName.MAP_OUTPUT_RECORDS);
}
}
// If both totalSpills and totalOutputRecords are zero then set ratioSpills to zero.
if (totalSpills == 0) {
ratioSpills = 0;
} else {
ratioSpills = (double) totalSpills / (double) totalOutputRecords;
}
Severity severity = getSpillSeverity(ratioSpills);
// Severity is reduced if number of tasks is small
Severity taskSeverity = getNumTasksSeverity(tasks.length);
severity = Severity.min(severity, taskSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Avg spilled records per task", tasks.length == 0 ? "0" : Long.toString(totalSpills / tasks.length));
result.addResultDetail("Avg output records per task", tasks.length == 0 ? "0" : Long.toString(totalOutputRecords / tasks.length));
result.addResultDetail("Ratio of spilled records to output records", Double.toString(ratioSpills));
return result;
}
Aggregations