use of models.JobExecution in project dr-elephant by linkedin.
the class AzkabanJobCompleteDetector method getCompletedExecutions.
/**
* Returns the list of completed executions
* @param jobExecutions Started Execution list
* @return List of completed executions
* @throws MalformedURLException
* @throws URISyntaxException
*/
protected List<TuningJobExecution> getCompletedExecutions(List<TuningJobExecution> jobExecutions) throws MalformedURLException, URISyntaxException {
logger.info("Fetching the list of executions completed since last iteration");
List<TuningJobExecution> completedExecutions = new ArrayList<TuningJobExecution>();
try {
for (TuningJobExecution tuningJobExecution : jobExecutions) {
JobExecution jobExecution = tuningJobExecution.jobExecution;
logger.info("Checking current status of started execution: " + tuningJobExecution.jobExecution.jobExecId);
if (_azkabanJobStatusUtil == null) {
logger.info("Initializing AzkabanJobStatusUtil");
_azkabanJobStatusUtil = new AzkabanJobStatusUtil();
}
try {
Map<String, String> jobStatus = _azkabanJobStatusUtil.getJobsFromFlow(jobExecution.flowExecution.flowExecId);
if (jobStatus != null) {
for (Map.Entry<String, String> job : jobStatus.entrySet()) {
logger.info("Job Found:" + job.getKey() + ". Status: " + job.getValue());
if (job.getKey().equals(jobExecution.job.jobName)) {
if (job.getValue().equals(AzkabanJobStatus.FAILED.toString())) {
tuningJobExecution.paramSetState = ParamSetStatus.EXECUTED;
jobExecution.executionState = ExecutionState.FAILED;
}
if (job.getValue().equals(AzkabanJobStatus.CANCELLED.toString()) || job.getValue().equals(AzkabanJobStatus.KILLED.toString())) {
tuningJobExecution.paramSetState = ParamSetStatus.EXECUTED;
jobExecution.executionState = ExecutionState.CANCELLED;
}
if (job.getValue().equals(AzkabanJobStatus.SUCCEEDED.toString())) {
tuningJobExecution.paramSetState = ParamSetStatus.EXECUTED;
jobExecution.executionState = ExecutionState.SUCCEEDED;
}
if (tuningJobExecution.paramSetState.equals(ParamSetStatus.EXECUTED)) {
completedExecutions.add(tuningJobExecution);
logger.info("Execution " + tuningJobExecution.jobExecution.jobExecId + " is completed");
} else {
logger.info("Execution " + tuningJobExecution.jobExecution.jobExecId + " is still in running state");
}
}
}
} else {
logger.info("No jobs found for flow execution: " + jobExecution.flowExecution.flowExecId);
}
} catch (Exception e) {
logger.error("Error in checking status of execution: " + jobExecution.jobExecId, e);
}
}
} catch (Exception e) {
logger.error("Error in fetching list of completed executions", e);
e.printStackTrace();
}
logger.info("Number of executions completed since last iteration: " + completedExecutions.size());
return completedExecutions;
}
use of models.JobExecution in project dr-elephant by linkedin.
the class FitnessComputeUtil method updateExecutionMetrics.
/**
* Updates the execution metrics
* @param completedExecutions List of completed executions
*/
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
for (TuningJobExecution tuningJobExecution : completedExecutions) {
logger.info("Updating execution metrics and fitness for execution: " + tuningJobExecution.jobExecution.jobExecId);
try {
JobExecution jobExecution = tuningJobExecution.jobExecution;
JobDefinition job = jobExecution.job;
// job id match and tuning enabled
TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
List<AppResult> results = AppResult.find.select("*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS, "*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS + "." + AppHeuristicResult.TABLE.APP_HEURISTIC_RESULT_DETAILS, "*").where().eq(AppResult.TABLE.FLOW_EXEC_ID, jobExecution.flowExecution.flowExecId).eq(AppResult.TABLE.JOB_EXEC_ID, jobExecution.jobExecId).findList();
if (results != null && results.size() > 0) {
Long totalExecutionTime = 0L;
Double totalResourceUsed = 0D;
Double totalInputBytesInBytes = 0D;
for (AppResult appResult : results) {
totalResourceUsed += appResult.resourceUsed;
totalInputBytesInBytes += getTotalInputBytes(appResult);
}
Long totalRunTime = Utils.getTotalRuntime(results);
Long totalDelay = Utils.getTotalWaittime(results);
totalExecutionTime = totalRunTime - totalDelay;
if (totalExecutionTime != 0) {
jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
jobExecution.inputSizeInBytes = totalInputBytesInBytes;
logger.info("Metric Values for execution " + jobExecution.jobExecId + ": Execution time = " + totalExecutionTime + ", Resource usage = " + totalResourceUsed + " and total input size = " + totalInputBytesInBytes);
}
if (tuningJobDefinition.averageResourceUsage == null && totalExecutionTime != 0) {
tuningJobDefinition.averageResourceUsage = jobExecution.resourceUsage;
tuningJobDefinition.averageExecutionTime = jobExecution.executionTime;
tuningJobDefinition.averageInputSizeInBytes = jobExecution.inputSizeInBytes.longValue();
tuningJobDefinition.update();
}
// Compute fitness
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
logger.info("Execution " + jobExecution.jobExecId + " failed/cancelled. Applying penalty");
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
} else if (jobExecution.resourceUsage > (// Todo: Check execution time constraint as well
tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
logger.info("Execution " + jobExecution.jobExecId + " violates constraint on resource usage");
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
} else {
tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
}
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
} else {
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
jobExecution.executionTime = 0D;
jobExecution.resourceUsage = 0D;
jobExecution.inputSizeInBytes = 0D;
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
}
}
} catch (Exception e) {
logger.error("Error updating fitness of execution: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
}
}
logger.info("Execution metrics updated");
}
Aggregations