Search in sources :

Example 6 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class BaselineComputeUtil method updateMetrics.

/**
 * This method update metrics for auto tuning monitoring for baseline computation
 * @param tuningJobDefinitions
 */
private void updateMetrics(List<TuningJobDefinition> tuningJobDefinitions) {
    int baselineComputeWaitJobs = 0;
    for (TuningJobDefinition tuningJobDefinition : tuningJobDefinitions) {
        if (tuningJobDefinition.averageResourceUsage == null) {
            baselineComputeWaitJobs++;
        } else {
            AutoTuningMetricsController.markBaselineComputed();
        }
    }
    AutoTuningMetricsController.setBaselineComputeWaitJobs(baselineComputeWaitJobs);
}
Also used : TuningJobDefinition(models.TuningJobDefinition)

Example 7 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class FitnessComputeUtil method updateExecutionMetrics.

/**
 * Updates the execution metrics
 * @param completedExecutions List of completed executions
 */
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
    for (TuningJobExecution tuningJobExecution : completedExecutions) {
        logger.info("Updating execution metrics and fitness for execution: " + tuningJobExecution.jobExecution.jobExecId);
        try {
            JobExecution jobExecution = tuningJobExecution.jobExecution;
            JobDefinition job = jobExecution.job;
            // job id match and tuning enabled
            TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
            List<AppResult> results = AppResult.find.select("*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS, "*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS + "." + AppHeuristicResult.TABLE.APP_HEURISTIC_RESULT_DETAILS, "*").where().eq(AppResult.TABLE.FLOW_EXEC_ID, jobExecution.flowExecution.flowExecId).eq(AppResult.TABLE.JOB_EXEC_ID, jobExecution.jobExecId).findList();
            if (results != null && results.size() > 0) {
                Long totalExecutionTime = 0L;
                Double totalResourceUsed = 0D;
                Double totalInputBytesInBytes = 0D;
                for (AppResult appResult : results) {
                    totalResourceUsed += appResult.resourceUsed;
                    totalInputBytesInBytes += getTotalInputBytes(appResult);
                }
                Long totalRunTime = Utils.getTotalRuntime(results);
                Long totalDelay = Utils.getTotalWaittime(results);
                totalExecutionTime = totalRunTime - totalDelay;
                if (totalExecutionTime != 0) {
                    jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
                    jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
                    jobExecution.inputSizeInBytes = totalInputBytesInBytes;
                    logger.info("Metric Values for execution " + jobExecution.jobExecId + ": Execution time = " + totalExecutionTime + ", Resource usage = " + totalResourceUsed + " and total input size = " + totalInputBytesInBytes);
                }
                if (tuningJobDefinition.averageResourceUsage == null && totalExecutionTime != 0) {
                    tuningJobDefinition.averageResourceUsage = jobExecution.resourceUsage;
                    tuningJobDefinition.averageExecutionTime = jobExecution.executionTime;
                    tuningJobDefinition.averageInputSizeInBytes = jobExecution.inputSizeInBytes.longValue();
                    tuningJobDefinition.update();
                }
                // Compute fitness
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    logger.info("Execution " + jobExecution.jobExecId + " failed/cancelled. Applying penalty");
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                } else if (jobExecution.resourceUsage > (// Todo: Check execution time constraint as well
                tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
                    logger.info("Execution " + jobExecution.jobExecId + " violates constraint on resource usage");
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
                } else {
                    tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
                }
                tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                jobExecution.update();
                tuningJobExecution.update();
            } else {
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                    jobExecution.executionTime = 0D;
                    jobExecution.resourceUsage = 0D;
                    jobExecution.inputSizeInBytes = 0D;
                    tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                    jobExecution.update();
                    tuningJobExecution.update();
                }
            }
        } catch (Exception e) {
            logger.error("Error updating fitness of execution: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
        }
    }
    logger.info("Execution metrics updated");
}
Also used : TuningJobExecution(models.TuningJobExecution) JobExecution(models.JobExecution) TuningJobExecution(models.TuningJobExecution) TuningJobDefinition(models.TuningJobDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition) AppResult(models.AppResult)

Aggregations

TuningJobDefinition (models.TuningJobDefinition)7 TuningJobExecution (models.TuningJobExecution)4 JobDefinition (models.JobDefinition)3 JobExecution (models.JobExecution)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 BaselineComputeUtil (com.linkedin.drelephant.tuning.BaselineComputeUtil)1 IOException (java.io.IOException)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 HashMap (java.util.HashMap)1 AppResult (models.AppResult)1 FlowDefinition (models.FlowDefinition)1 JobSuggestedParamValue (models.JobSuggestedParamValue)1 Configuration (org.apache.hadoop.conf.Configuration)1 AuthenticatedURL (org.apache.hadoop.security.authentication.client.AuthenticatedURL)1 AuthenticationException (org.apache.hadoop.security.authentication.client.AuthenticationException)1 JsonNode (org.codehaus.jackson.JsonNode)1 Test (org.junit.Test)1