use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class MapperSpillHeuristic method apply.
@Override
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = data.getMapTaskData();
long totalSpills = 0;
long totalOutputRecords = 0;
double ratioSpills = 0.0;
for (TezTaskData task : tasks) {
if (task.isSampled()) {
totalSpills += task.getCounters().get(TezCounterData.CounterName.SPILLED_RECORDS);
totalOutputRecords += task.getCounters().get(TezCounterData.CounterName.OUTPUT_RECORDS);
}
}
// If both totalSpills and totalOutputRecords are zero then set ratioSpills to zero.
if (totalSpills == 0) {
ratioSpills = 0;
} else {
ratioSpills = (double) totalSpills / (double) totalOutputRecords;
}
Severity severity = getSpillSeverity(ratioSpills);
// Severity is reduced if number of tasks is small
Severity taskSeverity = getNumTasksSeverity(tasks.length);
severity = Severity.min(severity, taskSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Avg spilled records per task", tasks.length == 0 ? "0" : Long.toString(totalSpills / tasks.length));
result.addResultDetail("Avg output records per task", tasks.length == 0 ? "0" : Long.toString(totalOutputRecords / tasks.length));
result.addResultDetail("Ratio of spilled records to output records", Double.toString(ratioSpills));
return result;
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class MapperTimeHeuristic method apply.
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = data.getMapTaskData();
List<Long> inputSizes = new ArrayList<Long>();
List<Long> runtimesMs = new ArrayList<Long>();
long taskMinMs = Long.MAX_VALUE;
long taskMaxMs = 0;
for (TezTaskData task : tasks) {
if (task.isSampled()) {
long inputByte = 0;
for (TezCounterData.CounterName counterName : _counterNames) {
inputByte += task.getCounters().get(counterName);
}
inputSizes.add(inputByte);
long taskTime = task.getTotalRunTimeMs();
runtimesMs.add(taskTime);
taskMinMs = Math.min(taskMinMs, taskTime);
taskMaxMs = Math.max(taskMaxMs, taskTime);
}
}
if (taskMinMs == Long.MAX_VALUE) {
taskMinMs = 0;
}
long averageSize = Statistics.average(inputSizes);
long averageTimeMs = Statistics.average(runtimesMs);
Severity shortTaskSeverity = shortTaskSeverity(tasks.length, averageTimeMs);
Severity longTaskSeverity = longTaskSeverity(tasks.length, averageTimeMs);
Severity severity = Severity.max(shortTaskSeverity, longTaskSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Average task input size", FileUtils.byteCountToDisplaySize(averageSize));
result.addResultDetail("Average task runtime", Statistics.readableTimespan(averageTimeMs));
result.addResultDetail("Max task runtime", Statistics.readableTimespan(taskMaxMs));
result.addResultDetail("Min task runtime", Statistics.readableTimespan(taskMinMs));
return result;
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class MapperTimeHeuristic method shortTaskSeverity.
private Severity shortTaskSeverity(long numTasks, long averageTimeMs) {
// We want to identify jobs with short task runtime
Severity severity = getShortRuntimeSeverity(averageTimeMs);
// Severity is reduced if number of tasks is small.
Severity numTaskSeverity = getNumTasksSeverity(numTasks);
return Severity.min(severity, numTaskSeverity);
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class GenericGCHeuristic method getGcRatioSeverity.
private Severity getGcRatioSeverity(long runtimeMs, long cpuMs, long gcMs) {
double gcRatio = ((double) gcMs) / cpuMs;
Severity ratioSeverity = Severity.getSeverityAscending(gcRatio, gcRatioLimits[0], gcRatioLimits[1], gcRatioLimits[2], gcRatioLimits[3]);
// Severity is reduced if task runtime is insignificant
Severity runtimeSeverity = getRuntimeSeverity(runtimeMs);
return Severity.min(ratioSeverity, runtimeSeverity);
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class GenericGCHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = getTasks(data);
List<Long> gcMs = new ArrayList<Long>();
List<Long> cpuMs = new ArrayList<Long>();
List<Long> runtimesMs = new ArrayList<Long>();
for (MapReduceTaskData task : tasks) {
if (task.isTimeAndCounterDataPresent()) {
runtimesMs.add(task.getTotalRunTimeMs());
gcMs.add(task.getCounters().get(MapReduceCounterData.CounterName.GC_MILLISECONDS));
cpuMs.add(task.getCounters().get(MapReduceCounterData.CounterName.CPU_MILLISECONDS));
}
}
long avgRuntimeMs = Statistics.average(runtimesMs);
long avgCpuMs = Statistics.average(cpuMs);
long avgGcMs = Statistics.average(gcMs);
double ratio = avgCpuMs != 0 ? avgGcMs * (1.0) / avgCpuMs : 0;
Severity severity;
if (tasks.length == 0) {
severity = Severity.NONE;
} else {
severity = getGcRatioSeverity(avgRuntimeMs, avgCpuMs, avgGcMs);
}
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Avg task runtime (ms)", Long.toString(avgRuntimeMs));
result.addResultDetail("Avg task CPU time (ms)", Long.toString(avgCpuMs));
result.addResultDetail("Avg task GC time (ms)", Long.toString(avgGcMs));
result.addResultDetail("Task GC/CPU ratio", Double.toString(ratio));
return result;
}
Aggregations