use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class ReducerMemoryHeuristicTest method analyzeJob.
private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
TezCounterData counter = new TezCounterData();
counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
Properties p = new Properties();
p.setProperty(com.linkedin.drelephant.mapreduce.heuristics.ReducerMemoryHeuristic.REDUCER_MEMORY_CONF, Long.toString(containerMemMB));
int i = 0;
for (; i < NUMTASKS; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTimeAndCounter(new long[5], counter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
data.setConf(p);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class ReducerTimeHeuristicTest method analyzeJob.
private Severity analyzeJob(long runtimeMs, int numTasks) throws IOException {
TezCounterData dummyCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[numTasks + 1];
int i = 0;
for (; i < numTasks; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTime(new long[] { runtimeMs, 0, 0, 0, 0 });
reducers[i].setCounter(dummyCounter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(dummyCounter).setReduceTaskData(reducers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class TezTaskLevelAggregatedMetrics method compute.
/**
* Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage.
* @param taskDatas
* @param containerSize
* @param idealStartTime
*/
private void compute(TezTaskData[] taskDatas, long containerSize, long idealStartTime) {
long peakMemoryNeed = 0;
long taskFinishTimeMax = 0;
long taskDurationMax = 0;
// if there are zero tasks, then nothing to compute.
if (taskDatas == null || taskDatas.length == 0) {
return;
}
for (TezTaskData taskData : taskDatas) {
if (!taskData.isSampled()) {
continue;
}
// MB
long taskMemory = taskData.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES) / FileUtils.ONE_MB;
// MB
long taskVM = taskData.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES) / FileUtils.ONE_MB;
// Milliseconds
long taskDuration = taskData.getFinishTime() - taskData.getStartTime();
// MB Seconds
long taskCost = (containerSize) * (taskDuration / Statistics.SECOND_IN_MS);
durations.add(taskDuration);
finishTimes.add(taskData.getFinishTime());
// peak Memory usage
long memoryRequiredForVM = (long) (taskVM / CLUSTER_MEMORY_FACTOR);
long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory;
peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed;
if (taskFinishTimeMax < taskData.getFinishTime()) {
taskFinishTimeMax = taskData.getFinishTime();
}
if (taskDurationMax < taskDuration) {
taskDurationMax = taskDuration;
}
_resourceUsed += taskCost;
}
// Compute the delay in starting the task.
_delay = taskFinishTimeMax - (idealStartTime + taskDurationMax);
// invalid delay
if (_delay < 0) {
_delay = 0;
}
// wastedResources
long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER);
if (wastedMemory > 0) {
for (long duration : durations) {
// MB Seconds
_resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS);
}
}
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class GenericDataSkewHeuristic method apply.
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = getTasks(data);
// Gathering data for checking time skew
List<Long> timeTaken = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isSampled()) {
timeTaken.add(tasks[i].getTotalRunTimeMs());
}
}
long[][] groupsTime = Statistics.findTwoGroups(Longs.toArray(timeTaken));
long timeAvg1 = Statistics.average(groupsTime[0]);
long timeAvg2 = Statistics.average(groupsTime[1]);
// seconds are used for calculating deviation as they provide a better idea than millisecond.
long timeAvgSec1 = TimeUnit.MILLISECONDS.toSeconds(timeAvg1);
long timeAvgSec2 = TimeUnit.MILLISECONDS.toSeconds(timeAvg2);
long minTime = Math.min(timeAvgSec1, timeAvgSec2);
long diffTime = Math.abs(timeAvgSec1 - timeAvgSec2);
// using the same deviation limits for time skew as for data skew. It can be changed in the fututre.
Severity severityTime = getDeviationSeverity(minTime, diffTime);
// This reduces severity if number of tasks is insignificant
severityTime = Severity.min(severityTime, Severity.getSeverityAscending(groupsTime[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
// Gather data
List<Long> inputSizes = new ArrayList<Long>();
for (int i = 0; i < tasks.length; i++) {
if (tasks[i].isSampled()) {
long inputByte = 0;
for (TezCounterData.CounterName counterName : _counterNames) {
inputByte += tasks[i].getCounters().get(counterName);
}
inputSizes.add(inputByte);
}
}
long[][] groups = Statistics.findTwoGroups(Longs.toArray(inputSizes));
long avg1 = Statistics.average(groups[0]);
long avg2 = Statistics.average(groups[1]);
long min = Math.min(avg1, avg2);
long diff = Math.abs(avg2 - avg1);
Severity severityData = getDeviationSeverity(min, diff);
// This reduces severity if the largest file sizes are insignificant
severityData = Severity.min(severityData, getFilesSeverity(avg2));
// This reduces severity if number of tasks is insignificant
severityData = Severity.min(severityData, Severity.getSeverityAscending(groups[0].length, numTasksLimits[0], numTasksLimits[1], numTasksLimits[2], numTasksLimits[3]));
Severity severity = Severity.max(severityData, severityTime);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severityData, tasks.length));
result.addResultDetail("Data skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Data skew (Group A)", groups[0].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg1) + " avg");
result.addResultDetail("Data skew (Group B)", groups[1].length + " tasks @ " + FileUtils.byteCountToDisplaySize(avg2) + " avg");
result.addResultDetail("Time skew (Number of tasks)", Integer.toString(tasks.length));
result.addResultDetail("Time skew (Group A)", groupsTime[0].length + " tasks @ " + convertTimeMs(timeAvg1) + " avg");
result.addResultDetail("Time skew (Group B)", groupsTime[1].length + " tasks @ " + convertTimeMs(timeAvg2) + " avg");
return result;
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class ReducerTimeHeuristic method apply.
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = data.getReduceTaskData();
List<Long> runtimesMs = new ArrayList<Long>();
long taskMinMs = Long.MAX_VALUE;
long taskMaxMs = 0;
for (TezTaskData task : tasks) {
if (task.isSampled()) {
long taskTime = task.getTotalRunTimeMs();
runtimesMs.add(taskTime);
taskMinMs = Math.min(taskMinMs, taskTime);
taskMaxMs = Math.max(taskMaxMs, taskTime);
}
}
if (taskMinMs == Long.MAX_VALUE) {
taskMinMs = 0;
}
long averageTimeMs = Statistics.average(runtimesMs);
Severity shortTaskSeverity = shortTaskSeverity(tasks.length, averageTimeMs);
Severity longTaskSeverity = longTaskSeverity(tasks.length, averageTimeMs);
Severity severity = Severity.max(shortTaskSeverity, longTaskSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Average task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(averageTimeMs).equals("") ? "0 sec" : Statistics.readableTimespan(averageTimeMs)));
result.addResultDetail("Max task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMaxMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMaxMs)));
result.addResultDetail("Min task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMinMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMinMs)));
return result;
}
Aggregations