use of models.JobDefinition in project dr-elephant by linkedin.
the class APIFitnessComputeUtil method updateExecutionMetrics.
/**
* Updates the execution metrics
* @param completedExecutions List of completed executions
*/
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
logger.debug("Updating execution metrics");
updateAuthToken();
for (TuningJobExecution tuningJobExecution : completedExecutions) {
logger.debug("Completed executions before updating metric: " + Json.toJson(tuningJobExecution));
try {
JobExecution jobExecution = tuningJobExecution.jobExecution;
JobDefinition job = jobExecution.job;
URL jobExecURL = new URL(new URL(_drElephantURL), String.format("/rest/jobexec?id=%s", URLEncoder.encode(jobExecution.jobExecId)));
HttpURLConnection conn = (HttpURLConnection) jobExecURL.openConnection();
JsonNode allApps = _objectMapper.readTree(conn.getInputStream());
// job id match and tuning enabled
TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
if (allApps != null && allApps.size() > 0) {
Long totalExecutionTime = 0L;
Double totalResourceUsed = 0D;
Double totalInputBytesInBytes = 0D;
for (JsonNode app : allApps) {
logger.info("Job Execution Update: ApplicationID " + app.get("id").getTextValue());
Long executionTime = app.get("finishTime").getLongValue() - app.get("startTime").getLongValue() - app.get("totalDelay").getLongValue();
totalExecutionTime += executionTime;
totalResourceUsed += app.get("resourceUsed").getDoubleValue();
totalInputBytesInBytes += getTotalInputBytes(app.get("id").getTextValue());
}
if (totalExecutionTime != 0) {
jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
jobExecution.inputSizeInBytes = totalInputBytesInBytes;
logger.info("Job Execution Update: UpdatedValue " + totalExecutionTime + ":" + totalResourceUsed + ":" + totalInputBytesInBytes);
}
logger.debug("Job execution " + jobExecution.resourceUsage);
logger.debug("Job details: AvgResourceUsage " + tuningJobDefinition.averageResourceUsage + ", allowedMaxResourceUsagePercent: " + tuningJobDefinition.allowedMaxResourceUsagePercent);
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
} else if (jobExecution.resourceUsage > (tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
} else {
tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
}
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
logger.debug("Completed executions after updating metrics: " + Json.toJson(tuningJobExecution));
} else {
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
jobExecution.executionTime = 0D;
jobExecution.resourceUsage = 0D;
jobExecution.inputSizeInBytes = 0D;
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
}
}
} catch (Exception e) {
logger.error("Error updating fitness of job_exec_id: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
}
}
logger.debug("Execution metrics updated");
}
use of models.JobDefinition in project dr-elephant by linkedin.
the class AutoTuningAPIHelper method addNewJobForTuning.
/**
* Add new job for tuning
* @param tuningInput Tuning input parameters
* @return Job
*/
private TuningJobDefinition addNewJobForTuning(TuningInput tuningInput) {
logger.info("Adding new job for tuning, job id: " + tuningInput.getJobDefId());
JobDefinition job = JobDefinition.find.select("*").where().eq(JobDefinition.TABLE.jobDefId, tuningInput.getJobDefId()).findUnique();
FlowDefinition flowDefinition = FlowDefinition.find.where().eq(FlowDefinition.TABLE.flowDefId, tuningInput.getFlowDefId()).findUnique();
if (flowDefinition == null) {
flowDefinition = new FlowDefinition();
flowDefinition.flowDefId = tuningInput.getFlowDefId();
flowDefinition.flowDefUrl = tuningInput.getFlowDefUrl();
flowDefinition.save();
}
if (job == null) {
job = new JobDefinition();
job.jobDefId = tuningInput.getJobDefId();
job.scheduler = tuningInput.getScheduler();
job.username = tuningInput.getUserName();
job.jobName = tuningInput.getJobName();
job.jobDefUrl = tuningInput.getJobDefUrl();
job.flowDefinition = flowDefinition;
job.save();
}
String flowExecId = tuningInput.getFlowExecId();
String jobExecId = tuningInput.getJobExecId();
String flowExecUrl = tuningInput.getFlowExecUrl();
String jobExecUrl = tuningInput.getJobExecUrl();
String client = tuningInput.getClient();
String defaultParams = tuningInput.getDefaultParams();
TuningJobDefinition tuningJobDefinition = new TuningJobDefinition();
tuningJobDefinition.job = job;
tuningJobDefinition.client = client;
tuningJobDefinition.tuningAlgorithm = tuningInput.getTuningAlgorithm();
tuningJobDefinition.tuningEnabled = 1;
tuningJobDefinition.allowedMaxExecutionTimePercent = tuningInput.getAllowedMaxExecutionTimePercent();
tuningJobDefinition.allowedMaxResourceUsagePercent = tuningInput.getAllowedMaxResourceUsagePercent();
tuningJobDefinition.save();
TuningJobExecution tuningJobExecution = insertDefaultJobExecution(job, flowExecId, jobExecId, flowExecUrl, jobExecUrl, flowDefinition, tuningInput.getTuningAlgorithm());
insertDefaultParameters(tuningJobExecution.jobExecution, defaultParams);
logger.info("Added job: " + tuningInput.getJobDefId() + " for tuning");
return tuningJobDefinition;
}
use of models.JobDefinition in project dr-elephant by linkedin.
the class FitnessComputeUtil method updateExecutionMetrics.
/**
* Updates the execution metrics
* @param completedExecutions List of completed executions
*/
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
for (TuningJobExecution tuningJobExecution : completedExecutions) {
logger.info("Updating execution metrics and fitness for execution: " + tuningJobExecution.jobExecution.jobExecId);
try {
JobExecution jobExecution = tuningJobExecution.jobExecution;
JobDefinition job = jobExecution.job;
// job id match and tuning enabled
TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
List<AppResult> results = AppResult.find.select("*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS, "*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS + "." + AppHeuristicResult.TABLE.APP_HEURISTIC_RESULT_DETAILS, "*").where().eq(AppResult.TABLE.FLOW_EXEC_ID, jobExecution.flowExecution.flowExecId).eq(AppResult.TABLE.JOB_EXEC_ID, jobExecution.jobExecId).findList();
if (results != null && results.size() > 0) {
Long totalExecutionTime = 0L;
Double totalResourceUsed = 0D;
Double totalInputBytesInBytes = 0D;
for (AppResult appResult : results) {
totalResourceUsed += appResult.resourceUsed;
totalInputBytesInBytes += getTotalInputBytes(appResult);
}
Long totalRunTime = Utils.getTotalRuntime(results);
Long totalDelay = Utils.getTotalWaittime(results);
totalExecutionTime = totalRunTime - totalDelay;
if (totalExecutionTime != 0) {
jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
jobExecution.inputSizeInBytes = totalInputBytesInBytes;
logger.info("Metric Values for execution " + jobExecution.jobExecId + ": Execution time = " + totalExecutionTime + ", Resource usage = " + totalResourceUsed + " and total input size = " + totalInputBytesInBytes);
}
if (tuningJobDefinition.averageResourceUsage == null && totalExecutionTime != 0) {
tuningJobDefinition.averageResourceUsage = jobExecution.resourceUsage;
tuningJobDefinition.averageExecutionTime = jobExecution.executionTime;
tuningJobDefinition.averageInputSizeInBytes = jobExecution.inputSizeInBytes.longValue();
tuningJobDefinition.update();
}
// Compute fitness
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
logger.info("Execution " + jobExecution.jobExecId + " failed/cancelled. Applying penalty");
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
} else if (jobExecution.resourceUsage > (// Todo: Check execution time constraint as well
tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
logger.info("Execution " + jobExecution.jobExecId + " violates constraint on resource usage");
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
} else {
tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
}
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
} else {
if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
// Todo: Check if the reason of failure is auto tuning and handle cancelled cases
tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
jobExecution.executionTime = 0D;
jobExecution.resourceUsage = 0D;
jobExecution.inputSizeInBytes = 0D;
tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
jobExecution.update();
tuningJobExecution.update();
}
}
} catch (Exception e) {
logger.error("Error updating fitness of execution: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
}
}
logger.info("Execution metrics updated");
}
Aggregations