use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class ReducerTimeHeuristic method shortTimeSeverity.
private Severity shortTimeSeverity(long runtimeMs, long numTasks) {
Severity timeSeverity = getShortRuntimeSeverity(runtimeMs);
// Severity is adjusted based on number of tasks
Severity taskSeverity = getNumTasksSeverity(numTasks);
return Severity.min(timeSeverity, taskSeverity);
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class ShuffleSortHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = data.getReducerData();
List<Long> execTimeMs = new ArrayList<Long>();
List<Long> shuffleTimeMs = new ArrayList<Long>();
List<Long> sortTimeMs = new ArrayList<Long>();
for (MapReduceTaskData task : tasks) {
if (task.isTimeDataPresent()) {
execTimeMs.add(task.getCodeExecutionTimeMs());
shuffleTimeMs.add(task.getShuffleTimeMs());
sortTimeMs.add(task.getSortTimeMs());
}
}
// Analyze data
long avgExecTimeMs = Statistics.average(execTimeMs);
long avgShuffleTimeMs = Statistics.average(shuffleTimeMs);
long avgSortTimeMs = Statistics.average(sortTimeMs);
Severity shuffleSeverity = getShuffleSortSeverity(avgShuffleTimeMs, avgExecTimeMs);
Severity sortSeverity = getShuffleSortSeverity(avgSortTimeMs, avgExecTimeMs);
Severity severity = Severity.max(shuffleSeverity, sortSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(data.getReducerData().length));
result.addResultDetail("Average code runtime", Statistics.readableTimespan(avgExecTimeMs));
String shuffleFactor = Statistics.describeFactor(avgShuffleTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average shuffle time", Statistics.readableTimespan(avgShuffleTimeMs) + " " + shuffleFactor);
String sortFactor = Statistics.describeFactor(avgSortTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average sort time", Statistics.readableTimespan(avgSortTimeMs) + " " + sortFactor);
return result;
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class ReducerTimeHeuristic method shortTaskSeverity.
private Severity shortTaskSeverity(long numTasks, long averageTimeMs) {
// We want to identify jobs with short task runtime
Severity severity = getShortRuntimeSeverity(averageTimeMs);
// Severity is reduced if number of tasks is small.
Severity numTaskSeverity = getNumTasksSeverity(numTasks);
return Severity.min(severity, numTaskSeverity);
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class ReducerTimeHeuristic method apply.
public HeuristicResult apply(TezApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
TezTaskData[] tasks = data.getReduceTaskData();
List<Long> runtimesMs = new ArrayList<Long>();
long taskMinMs = Long.MAX_VALUE;
long taskMaxMs = 0;
for (TezTaskData task : tasks) {
if (task.isSampled()) {
long taskTime = task.getTotalRunTimeMs();
runtimesMs.add(taskTime);
taskMinMs = Math.min(taskMinMs, taskTime);
taskMaxMs = Math.max(taskMaxMs, taskTime);
}
}
if (taskMinMs == Long.MAX_VALUE) {
taskMinMs = 0;
}
long averageTimeMs = Statistics.average(runtimesMs);
Severity shortTaskSeverity = shortTaskSeverity(tasks.length, averageTimeMs);
Severity longTaskSeverity = longTaskSeverity(tasks.length, averageTimeMs);
Severity severity = Severity.max(shortTaskSeverity, longTaskSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
result.addResultDetail("Average task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(averageTimeMs).equals("") ? "0 sec" : Statistics.readableTimespan(averageTimeMs)));
result.addResultDetail("Max task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMaxMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMaxMs)));
result.addResultDetail("Min task runtime", tasks.length == 0 ? "0" : (Statistics.readableTimespan(taskMinMs).equals("") ? "0 sec" : Statistics.readableTimespan(taskMinMs)));
return result;
}
use of com.linkedin.drelephant.analysis.Severity in project dr-elephant by linkedin.
the class Web method restJobSummariesForUser.
/**
* This method returns the json object for job-summaries for the given user
* @param username The given username for which job-summaries json object should be returned
* @return The job-summaries json object for the given username
* response object:
* <pre>
*{
* "job-summaries": [
* {
* "id": "job-exec-id",
* "jobname": "jobname",
* "jobtype": "Pig",
* "username": "username",
* "starttime": 1471910835628,
* "finishtime": 1471911099238,
* "runtime": 263610,
* "waittime": 46234,
* "resourceused": 101382144,
* "resourcewasted": 15993417,
* "severity": "Moderate",
* "scheduler": "azkaban",
* "tasksseverity": [
* {
* "severity": "Moderate",
* "count": 1
* }
* ]
* }
* ]
*}
* </pre>
*/
public static Result restJobSummariesForUser(String username) {
JsonArray jobSummaryArray = new JsonArray();
List<AppResult> results = null;
if (username == null || username.isEmpty()) {
results = getSchedulerApplications(MAX_APPLICATIONS_IN_WORKFLOW);
} else {
results = getSchedulerApplications(username, MAX_APPLICATIONS_IN_WORKFLOW);
}
Map<IdUrlPair, List<AppResult>> jobExecIdToJobsMap = ControllerUtil.limitHistoryResults(ControllerUtil.groupJobs(results, ControllerUtil.GroupBy.JOB_EXECUTION_ID), results.size(), MAX_JOB_LIMIT);
for (IdUrlPair jobDefPair : jobExecIdToJobsMap.keySet()) {
long totalJobMemoryUsed = 0L;
long totalJobMemoryWasted = 0L;
long totalJobDelay = 0L;
long totalJobRuntime = 0L;
long jobStartTime = Long.MAX_VALUE;
long jobEndTime = 0;
Severity jobSeverity = Severity.NONE;
String jobType = null;
String jobId = jobDefPair.getId();
String jobName = "";
String user = null;
String queueName = "";
String scheduler = "";
String jobDefId = "";
String jobExecId = "";
Map<Severity, Long> applicationSeverityCount = new HashMap<Severity, Long>();
for (AppResult application : jobExecIdToJobsMap.get(jobDefPair)) {
totalJobMemoryUsed += application.resourceUsed;
totalJobMemoryWasted += application.resourceWasted;
jobType = application.jobType;
jobName = application.jobName;
jobDefId = application.jobDefId;
jobExecId = application.jobExecId;
queueName = application.queueName;
scheduler = application.scheduler;
if (application.startTime < jobStartTime) {
jobStartTime = application.startTime;
}
if (application.finishTime > jobEndTime) {
jobEndTime = application.finishTime;
}
if (application.severity.getValue() > jobSeverity.getValue()) {
jobSeverity = application.severity;
}
if (applicationSeverityCount.containsKey(application.severity)) {
applicationSeverityCount.put(application.severity, applicationSeverityCount.get(application.severity) + 1L);
} else {
applicationSeverityCount.put(application.severity, 1L);
}
user = application.username;
}
JsonArray applicationSeverity = new JsonArray();
List<Severity> keys = getSortedSeverityKeys(applicationSeverityCount.keySet());
for (Severity key : keys) {
JsonObject severityObject = new JsonObject();
severityObject.addProperty(JsonKeys.SEVERITY, key.getText());
severityObject.addProperty(JsonKeys.COUNT, applicationSeverityCount.get(key));
applicationSeverity.add(severityObject);
}
totalJobDelay = Utils.getTotalWaittime(jobExecIdToJobsMap.get(jobDefPair));
totalJobRuntime = Utils.getTotalRuntime(jobExecIdToJobsMap.get(jobDefPair));
JsonObject jobObject = new JsonObject();
jobObject.addProperty(JsonKeys.ID, jobId);
jobObject.addProperty(JsonKeys.JOB_NAME, jobName);
jobObject.addProperty(JsonKeys.JOB_TYPE, jobType);
jobObject.addProperty(JsonKeys.USERNAME, user);
jobObject.addProperty(JsonKeys.START_TIME, jobStartTime);
jobObject.addProperty(JsonKeys.FINISH_TIME, jobEndTime);
jobObject.addProperty(JsonKeys.RUNTIME, totalJobRuntime);
jobObject.addProperty(JsonKeys.WAITTIME, totalJobDelay);
jobObject.addProperty(JsonKeys.RESOURCE_USED, totalJobMemoryUsed);
jobObject.addProperty(JsonKeys.RESOURCE_WASTED, totalJobMemoryWasted);
jobObject.addProperty(JsonKeys.QUEUE, queueName);
jobObject.addProperty(JsonKeys.SCHEDULER, scheduler);
jobObject.addProperty(JsonKeys.SEVERITY, jobSeverity.getText());
jobObject.addProperty(JsonKeys.JOB_DEF_ID, jobDefId);
jobObject.addProperty(JsonKeys.JOB_EXEC_ID, jobExecId);
jobObject.add(JsonKeys.TASKS_SEVERITY, applicationSeverity);
jobSummaryArray.add(jobObject);
}
JsonArray sortedJobSummaryArray = getSortedJsonArrayByFinishTime(jobSummaryArray);
JsonObject parent = new JsonObject();
parent.add(JsonKeys.JOB_SUMMARIES, sortedJobSummaryArray);
return ok(new Gson().toJson(parent));
}
Aggregations