use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class GenericDataSkewHeuristic method apply.
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = getTasks(data);
// Gathering data for checking time skew
List<Long> timeTaken = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isSampled()) {
timeTaken.add(tasks[i].getTotalRunTimeMs());
}
}
long[][] groupsTime = Statistics.findTwoGroups(Longs.toArray(timeTaken));
long timeAvg1 = Statistics.average(groupsTime[0]);
long timeAvg2 = Statistics.average(groupsTime[1]);
// seconds are used for calculating deviation as they provide a better idea than millisecond.
long timeAvgSec1 = TimeUnit.MILLISECONDS.toSeconds(timeAvg1);
long timeAvgSec2 = TimeUnit.MILLISECONDS.toSeconds(timeAvg2);
long minTime = Math.min(timeAvgSec1, timeAvgSec2);
long diffTime = Math.abs(timeAvgSec1 - timeAvgSec2);
// using the same deviation limits for time skew as for data skew. It can be changed in the fututre.
Severity severityTime = getDeviationSeverity(minTime, diffTime);
// This reduces severity if number of tasks is insignificant
severityTime = Severity.min(severityTime, Severity.getSeverityAscending(groupsTime[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
// Gather data
List<Long> inputSizes = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isSampled()) {
long inputByte = 0;
for (TezCounterData.CounterName counterName : _counterNames) {
inputByte += tasks[i].getCounters().get(counterName);
}
inputSizes.add(inputByte);
}
}
long[][] groups = Statistics.findTwoGroups(Longs.toArray(inputSizes));
long avg1 = Statistics.average(groups[0]);
long avg2 = Statistics.average(groups[1]);
long min = Math.min(avg1, avg2);
long diff = Math.abs(avg2 - avg1);
Severity severityData = getDeviationSeverity(min, diff);
// This reduces severity if the largest file sizes are insignificant
severityData = Severity.min(severityData, getFilesSeverity(avg2));
// This reduces severity if number of tasks is insignificant
severityData = Severity.min(severityData, Severity.getSeverityAscending(groups[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
Severity severity = Severity.max(severityData, severityTime);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severityData, tasks.length));
result.addResultDetail("Data skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Data skew (Group A)", groups[0].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg1) + " avg");
result.addResultDetail("Data skew (Group B)", groups[1].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg2) + " avg");
result.addResultDetail("Time skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Time skew (Group A)", groupsTime[0].length + " tasks @ " + convertTimeMs(timeAvg1) + " avg");
result.addResultDetail("Time skew (Group B)", groupsTime[1].length + " tasks @ " + convertTimeMs(timeAvg2) + " avg");
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class GenericMemoryHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
String containerSizeStr = data.getConf().getProperty(_containerMemConf);
long containerMem = -1L;
if (containerSizeStr != null) {
try {
containerMem = Long.parseLong(containerSizeStr);
} catch (NumberFormatException e0) {
// Some job has a string var like "${VAR}" for this config.
if (containerSizeStr.startsWith("$")) {
String realContainerConf = containerSizeStr.substring(containerSizeStr.indexOf("{") + 1, containerSizeStr.indexOf("}"));
String realContainerSizeStr = data.getConf().getProperty(realContainerConf);
try {
containerMem = Long.parseLong(realContainerSizeStr);
} catch (NumberFormatException e1) {
logger.warn(realContainerConf + ": expected number [" + realContainerSizeStr + "]");
}
} else {
logger.warn(_containerMemConf + ": expected number [" + containerSizeStr + "]");
}
}
}
if (containerMem < 0) {
containerMem = getContainerMemDefaultMBytes();
}
containerMem *= FileUtils.ONE_MB;
MapReduceTaskData[] tasks = getTasks(data);
List<Long> taskPMems = new ArrayList<Long>();
List<Long> taskVMems = new ArrayList<Long>();
List<Long> runtimesMs = new ArrayList<Long>();
long taskPMin = Long.MAX_VALUE;
long taskPMax = 0;
for (MapReduceTaskData task : tasks) {
if (task.isTimeAndCounterDataPresent()) {
runtimesMs.add(task.getTotalRunTimeMs());
long taskPMem = task.getCounters().get(MapReduceCounterData.CounterName.PHYSICAL_MEMORY_BYTES);
long taskVMem = task.getCounters().get(MapReduceCounterData.CounterName.VIRTUAL_MEMORY_BYTES);
taskPMems.add(taskPMem);
taskPMin = Math.min(taskPMin, taskPMem);
taskPMax = Math.max(taskPMax, taskPMem);
taskVMems.add(taskVMem);
}
}
if (taskPMin == Long.MAX_VALUE) {
taskPMin = 0;
}
long taskPMemAvg = Statistics.average(taskPMems);
long taskVMemAvg = Statistics.average(taskVMems);
long averageTimeMs = Statistics.average(runtimesMs);
Severity severity;
if (tasks.length == 0) {
severity = Severity.NONE;
} else {
severity = getTaskMemoryUtilSeverity(taskPMemAvg, containerMem);
}
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Avg task runtime", Statistics.readableTimespan(averageTimeMs));
result.addResultDetail("Avg Physical Memory (MB)", Long.toString(taskPMemAvg / FileUtils.ONE_MB));
result.addResultDetail("Max Physical Memory (MB)", Long.toString(taskPMax / FileUtils.ONE_MB));
result.addResultDetail("Min Physical Memory (MB)", Long.toString(taskPMin / FileUtils.ONE_MB));
result.addResultDetail("Avg Virtual Memory (MB)", Long.toString(taskVMemAvg / FileUtils.ONE_MB));
result.addResultDetail("Requested Container Memory", FileUtils.byteCountToDisplaySize(containerMem));
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class JobQueueLimitHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), Severity.NONE, 0);
Properties jobConf = data.getConf();
long queueTimeoutLimitMs = TimeUnit.MINUTES.toMillis(15);
// Fetch the Queue to which the job is submitted.
String queueName = jobConf.getProperty("mapred.job.queue.name");
if (queueName == null) {
throw new IllegalStateException("Queue Name not found.");
}
// Compute severity if job is submitted to default queue else set severity to NONE.
MapReduceTaskData[] mapTasks = data.getMapperData();
MapReduceTaskData[] redTasks = data.getReducerData();
Severity[] mapTasksSeverity = new Severity[mapTasks.length];
Severity[] redTasksSeverity = new Severity[redTasks.length];
if (queueName.equals("default")) {
result.addResultDetail("Queue: ", queueName, null);
result.addResultDetail("Number of Map tasks", Integer.toString(mapTasks.length));
result.addResultDetail("Number of Reduce tasks", Integer.toString(redTasks.length));
// Calculate Severity of Mappers
mapTasksSeverity = getTasksSeverity(mapTasks, queueTimeoutLimitMs);
result.addResultDetail("Number of Map tasks that are in severe state (14 to 14.5 min)", Long.toString(getSeverityFrequency(Severity.SEVERE, mapTasksSeverity)));
result.addResultDetail("Number of Map tasks that are in critical state (over 14.5 min)", Long.toString(getSeverityFrequency(Severity.CRITICAL, mapTasksSeverity)));
// Calculate Severity of Reducers
redTasksSeverity = getTasksSeverity(redTasks, queueTimeoutLimitMs);
result.addResultDetail("Number of Reduce tasks that are in severe state (14 to 14.5 min)", Long.toString(getSeverityFrequency(Severity.SEVERE, redTasksSeverity)));
result.addResultDetail("Number of Reduce tasks that are in critical state (over 14.5 min)", Long.toString(getSeverityFrequency(Severity.CRITICAL, redTasksSeverity)));
// Calculate Job severity
result.setSeverity(Severity.max(Severity.max(mapTasksSeverity), Severity.max(redTasksSeverity)));
} else {
result.addResultDetail("Not Applicable", "This Heuristic is not applicable to " + queueName + " queue");
result.setSeverity(Severity.NONE);
}
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class ReducerTimeHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = data.getReducerData();
List<Long> runTimesMs = new ArrayList<Long>();
long taskMinMs = Long.MAX_VALUE;
long taskMaxMs = 0;
for (MapReduceTaskData task : tasks) {
if (task.isTimeDataPresent()) {
long taskTime = task.getTotalRunTimeMs();
runTimesMs.add(taskTime);
taskMinMs = Math.min(taskMinMs, taskTime);
taskMaxMs = Math.max(taskMaxMs, taskTime);
}
}
if (taskMinMs == Long.MAX_VALUE) {
taskMinMs = 0;
}
// Analyze data
long averageRuntimeMs = Statistics.average(runTimesMs);
Severity shortTimeSeverity = shortTimeSeverity(averageRuntimeMs, tasks.length);
Severity longTimeSeverity = longTimeSeverity(averageRuntimeMs, tasks.length);
Severity severity = Severity.max(shortTimeSeverity, longTimeSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Average task runtime", Statistics.readableTimespan(averageRuntimeMs));
result.addResultDetail("Max task runtime", Statistics.readableTimespan(taskMaxMs));
result.addResultDetail("Min task runtime", Statistics.readableTimespan(taskMinMs));
return result;
}
use of com.linkedin.drelephant.analysis.HeuristicResult in project dr-elephant by linkedin.
the class ShuffleSortHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = data.getReducerData();
List<Long> execTimeMs = new ArrayList<Long>();
List<Long> shuffleTimeMs = new ArrayList<Long>();
List<Long> sortTimeMs = new ArrayList<Long>();
for (MapReduceTaskData task : tasks) {
if (task.isTimeDataPresent()) {
execTimeMs.add(task.getCodeExecutionTimeMs());
shuffleTimeMs.add(task.getShuffleTimeMs());
sortTimeMs.add(task.getSortTimeMs());
}
}
// Analyze data
long avgExecTimeMs = Statistics.average(execTimeMs);
long avgShuffleTimeMs = Statistics.average(shuffleTimeMs);
long avgSortTimeMs = Statistics.average(sortTimeMs);
Severity shuffleSeverity = getShuffleSortSeverity(avgShuffleTimeMs, avgExecTimeMs);
Severity sortSeverity = getShuffleSortSeverity(avgSortTimeMs, avgExecTimeMs);
Severity severity = Severity.max(shuffleSeverity, sortSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(data.getReducerData().length));
result.addResultDetail("Average code runtime", Statistics.readableTimespan(avgExecTimeMs));
String shuffleFactor = Statistics.describeFactor(avgShuffleTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average shuffle time", Statistics.readableTimespan(avgShuffleTimeMs) + " " + shuffleFactor);
String sortFactor = Statistics.describeFactor(avgSortTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average sort time", Statistics.readableTimespan(avgSortTimeMs) + " " + sortFactor);
return result;
}
Aggregations