use of models.TuningJobDefinition in project dr-elephant by linkedin.
the class BaselineComputeUtil method updateMetrics.
/**
* This method update metrics for auto tuning monitoring for baseline computation
* @param tuningJobDefinitions
*/
private void updateMetrics(List<TuningJobDefinition> tuningJobDefinitions) {
int baselineComputeWaitJobs = 0;
for (TuningJobDefinition tuningJobDefinition : tuningJobDefinitions) {
if (tuningJobDefinition.averageResourceUsage == null) {
baselineComputeWaitJobs++;
} else {
AutoTuningMetricsController.markBaselineComputed();
}
}
AutoTuningMetricsController.setBaselineComputeWaitJobs(baselineComputeWaitJobs);
}
use of models.TuningJobDefinition in project dr-elephant by linkedin.
the class FitnessComputeUtil method updateExecutionMetrics.
/**
* Updates the execution metrics
* @param completedExecutions List of completed executions
*/
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
for (TuningJobExecution tuningJobExecution : completedExecutions) {
logger.info("Updating execution metrics and fitness for execution: " + tuningJobExecution.jobExecution.jobExecId);
try {
JobExecution jobExecution = tuningJobExecution.jobExecution;
JobDefinition job = jobExecution.job;
// job id match and tuning enabled
TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
List<AppResult> results = AppResult.find.select("*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS, "*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS + "." + AppHeuristicResult.TABLE.APP_HEURISTIC_RESULT_DETAILS, "*").where().eq(AppResult.TABLE.FLOW_EXEC_ID, jobExecution.flowExecution.flowExecId).eq(AppResult.TABLE.JOB_EXEC_ID, jobExecution.jobExecId).findList();
if (results != null && results.size() > 0) {
Long totalExecutionTime = 0L;
Double totalResourceUsed = 0D;
Double totalInputBytesInBytes = 0D;
for (AppResult appResult : results) {
totalResourceUsed += appResult.resourceUsed;
totalInputBytesInBytes += getTotalInputBytes(appResult);
}
Long totalRunTime = Utils.getTotalRuntime(results);
Long totalDelay = Utils.getTotalWaittime(results);
totalExecutionTime = totalRunTime - totalDelay;
if (totalExecutionTime != 0) {
jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
jobExecution.inputSizeInBytes = totalInputBytesInBytes;
logger.info("Metric Values for execution " + jobExecution.jobExecId + ": Execution time = " + totalExecutionTime + ", Resource usage = " + totalResourceUsed + " and total input size = " + totalInputBytesInBytes);
}
if (tuningJobDefinition.averageResourceUsage == null && totalExecutionTime != 0) {
tuningJobDefinition.averageResourceUsage = jobExecution.resourceUsage;
tuningJobDefinition.averageExecutionTime = jobExecution.executionTime;
tuningJobDefinition.averageInputSizeInBytes = jobExecution.inputSizeInBytes.longValue();
tuningJobDefinition.update();
}
// Compute fitness
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
logger.info("Execution " + jobExecution.jobExecId + " failed/cancelled. Applying penalty");
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
} else if (jobExecution.resourceUsage > (// Todo: Check execution time constraint as well
tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
logger.info("Execution " + jobExecution.jobExecId + " violates constraint on resource usage");
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
} else {
tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
}
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
} else {
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
jobExecution.executionTime = 0D;
jobExecution.resourceUsage = 0D;
jobExecution.inputSizeInBytes = 0D;
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
}
}
} catch (Exception e) {
logger.error("Error updating fitness of execution: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
}
}
logger.info("Execution metrics updated");
}
Aggregations