Search in sources :

Example 1 with JobDefinition

use of models.JobDefinition in project dr-elephant by linkedin.

the class APIFitnessComputeUtil method updateExecutionMetrics.

/**
 * Updates the execution metrics
 * @param completedExecutions List of completed executions
 */
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
    logger.debug("Updating execution metrics");
    updateAuthToken();
    for (TuningJobExecution tuningJobExecution : completedExecutions) {
        logger.debug("Completed executions before updating metric: " + Json.toJson(tuningJobExecution));
        try {
            JobExecution jobExecution = tuningJobExecution.jobExecution;
            JobDefinition job = jobExecution.job;
            URL jobExecURL = new URL(new URL(_drElephantURL), String.format("/rest/jobexec?id=%s", URLEncoder.encode(jobExecution.jobExecId)));
            HttpURLConnection conn = (HttpURLConnection) jobExecURL.openConnection();
            JsonNode allApps = _objectMapper.readTree(conn.getInputStream());
            // job id match and tuning enabled
            TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
            if (allApps != null && allApps.size() > 0) {
                Long totalExecutionTime = 0L;
                Double totalResourceUsed = 0D;
                Double totalInputBytesInBytes = 0D;
                for (JsonNode app : allApps) {
                    logger.info("Job Execution Update: ApplicationID " + app.get("id").getTextValue());
                    Long executionTime = app.get("finishTime").getLongValue() - app.get("startTime").getLongValue() - app.get("totalDelay").getLongValue();
                    totalExecutionTime += executionTime;
                    totalResourceUsed += app.get("resourceUsed").getDoubleValue();
                    totalInputBytesInBytes += getTotalInputBytes(app.get("id").getTextValue());
                }
                if (totalExecutionTime != 0) {
                    jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
                    jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
                    jobExecution.inputSizeInBytes = totalInputBytesInBytes;
                    logger.info("Job Execution Update: UpdatedValue " + totalExecutionTime + ":" + totalResourceUsed + ":" + totalInputBytesInBytes);
                }
                logger.debug("Job execution " + jobExecution.resourceUsage);
                logger.debug("Job details: AvgResourceUsage " + tuningJobDefinition.averageResourceUsage + ", allowedMaxResourceUsagePercent: " + tuningJobDefinition.allowedMaxResourceUsagePercent);
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                } else if (jobExecution.resourceUsage > (tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
                } else {
                    tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
                }
                tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                jobExecution.update();
                tuningJobExecution.update();
                logger.debug("Completed executions after updating metrics: " + Json.toJson(tuningJobExecution));
            } else {
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                    jobExecution.executionTime = 0D;
                    jobExecution.resourceUsage = 0D;
                    jobExecution.inputSizeInBytes = 0D;
                    tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                    jobExecution.update();
                    tuningJobExecution.update();
                }
            }
        } catch (Exception e) {
            logger.error("Error updating fitness of job_exec_id: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
        }
    }
    logger.debug("Execution metrics updated");
}
Also used : TuningJobExecution(models.TuningJobExecution) JobExecution(models.JobExecution) HttpURLConnection(java.net.HttpURLConnection) JsonNode(org.codehaus.jackson.JsonNode) TuningJobExecution(models.TuningJobExecution) TuningJobDefinition(models.TuningJobDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition) URL(java.net.URL) AuthenticatedURL(org.apache.hadoop.security.authentication.client.AuthenticatedURL) AuthenticationException(org.apache.hadoop.security.authentication.client.AuthenticationException) IOException(java.io.IOException)

Example 2 with JobDefinition

use of models.JobDefinition in project dr-elephant by linkedin.

the class AutoTuningAPIHelper method addNewJobForTuning.

/**
 * Add new job for tuning
 * @param tuningInput Tuning input parameters
 * @return Job
 */
private TuningJobDefinition addNewJobForTuning(TuningInput tuningInput) {
    logger.info("Adding new job for tuning, job id: " + tuningInput.getJobDefId());
    JobDefinition job = JobDefinition.find.select("*").where().eq(JobDefinition.TABLE.jobDefId, tuningInput.getJobDefId()).findUnique();
    FlowDefinition flowDefinition = FlowDefinition.find.where().eq(FlowDefinition.TABLE.flowDefId, tuningInput.getFlowDefId()).findUnique();
    if (flowDefinition == null) {
        flowDefinition = new FlowDefinition();
        flowDefinition.flowDefId = tuningInput.getFlowDefId();
        flowDefinition.flowDefUrl = tuningInput.getFlowDefUrl();
        flowDefinition.save();
    }
    if (job == null) {
        job = new JobDefinition();
        job.jobDefId = tuningInput.getJobDefId();
        job.scheduler = tuningInput.getScheduler();
        job.username = tuningInput.getUserName();
        job.jobName = tuningInput.getJobName();
        job.jobDefUrl = tuningInput.getJobDefUrl();
        job.flowDefinition = flowDefinition;
        job.save();
    }
    String flowExecId = tuningInput.getFlowExecId();
    String jobExecId = tuningInput.getJobExecId();
    String flowExecUrl = tuningInput.getFlowExecUrl();
    String jobExecUrl = tuningInput.getJobExecUrl();
    String client = tuningInput.getClient();
    String defaultParams = tuningInput.getDefaultParams();
    TuningJobDefinition tuningJobDefinition = new TuningJobDefinition();
    tuningJobDefinition.job = job;
    tuningJobDefinition.client = client;
    tuningJobDefinition.tuningAlgorithm = tuningInput.getTuningAlgorithm();
    tuningJobDefinition.tuningEnabled = 1;
    tuningJobDefinition.allowedMaxExecutionTimePercent = tuningInput.getAllowedMaxExecutionTimePercent();
    tuningJobDefinition.allowedMaxResourceUsagePercent = tuningInput.getAllowedMaxResourceUsagePercent();
    tuningJobDefinition.save();
    TuningJobExecution tuningJobExecution = insertDefaultJobExecution(job, flowExecId, jobExecId, flowExecUrl, jobExecUrl, flowDefinition, tuningInput.getTuningAlgorithm());
    insertDefaultParameters(tuningJobExecution.jobExecution, defaultParams);
    logger.info("Added job: " + tuningInput.getJobDefId() + " for tuning");
    return tuningJobDefinition;
}
Also used : TuningJobDefinition(models.TuningJobDefinition) TuningJobExecution(models.TuningJobExecution) FlowDefinition(models.FlowDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition)

Example 3 with JobDefinition

use of models.JobDefinition in project dr-elephant by linkedin.

the class FitnessComputeUtil method updateExecutionMetrics.

/**
 * Updates the execution metrics
 * @param completedExecutions List of completed executions
 */
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
    for (TuningJobExecution tuningJobExecution : completedExecutions) {
        logger.info("Updating execution metrics and fitness for execution: " + tuningJobExecution.jobExecution.jobExecId);
        try {
            JobExecution jobExecution = tuningJobExecution.jobExecution;
            JobDefinition job = jobExecution.job;
            // job id match and tuning enabled
            TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
            List<AppResult> results = AppResult.find.select("*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS, "*").fetch(AppResult.TABLE.APP_HEURISTIC_RESULTS + "." + AppHeuristicResult.TABLE.APP_HEURISTIC_RESULT_DETAILS, "*").where().eq(AppResult.TABLE.FLOW_EXEC_ID, jobExecution.flowExecution.flowExecId).eq(AppResult.TABLE.JOB_EXEC_ID, jobExecution.jobExecId).findList();
            if (results != null && results.size() > 0) {
                Long totalExecutionTime = 0L;
                Double totalResourceUsed = 0D;
                Double totalInputBytesInBytes = 0D;
                for (AppResult appResult : results) {
                    totalResourceUsed += appResult.resourceUsed;
                    totalInputBytesInBytes += getTotalInputBytes(appResult);
                }
                Long totalRunTime = Utils.getTotalRuntime(results);
                Long totalDelay = Utils.getTotalWaittime(results);
                totalExecutionTime = totalRunTime - totalDelay;
                if (totalExecutionTime != 0) {
                    jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
                    jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
                    jobExecution.inputSizeInBytes = totalInputBytesInBytes;
                    logger.info("Metric Values for execution " + jobExecution.jobExecId + ": Execution time = " + totalExecutionTime + ", Resource usage = " + totalResourceUsed + " and total input size = " + totalInputBytesInBytes);
                }
                if (tuningJobDefinition.averageResourceUsage == null && totalExecutionTime != 0) {
                    tuningJobDefinition.averageResourceUsage = jobExecution.resourceUsage;
                    tuningJobDefinition.averageExecutionTime = jobExecution.executionTime;
                    tuningJobDefinition.averageInputSizeInBytes = jobExecution.inputSizeInBytes.longValue();
                    tuningJobDefinition.update();
                }
                // Compute fitness
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    logger.info("Execution " + jobExecution.jobExecId + " failed/cancelled. Applying penalty");
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                } else if (jobExecution.resourceUsage > (// Todo: Check execution time constraint as well
                tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
                    logger.info("Execution " + jobExecution.jobExecId + " violates constraint on resource usage");
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
                } else {
                    tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
                }
                tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                jobExecution.update();
                tuningJobExecution.update();
            } else {
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                    jobExecution.executionTime = 0D;
                    jobExecution.resourceUsage = 0D;
                    jobExecution.inputSizeInBytes = 0D;
                    tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                    jobExecution.update();
                    tuningJobExecution.update();
                }
            }
        } catch (Exception e) {
            logger.error("Error updating fitness of execution: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
        }
    }
    logger.info("Execution metrics updated");
}
Also used : TuningJobExecution(models.TuningJobExecution) JobExecution(models.JobExecution) TuningJobExecution(models.TuningJobExecution) TuningJobDefinition(models.TuningJobDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition) AppResult(models.AppResult)

Aggregations

JobDefinition (models.JobDefinition)3 TuningJobDefinition (models.TuningJobDefinition)3 TuningJobExecution (models.TuningJobExecution)3 JobExecution (models.JobExecution)2 IOException (java.io.IOException)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 AppResult (models.AppResult)1 FlowDefinition (models.FlowDefinition)1 AuthenticatedURL (org.apache.hadoop.security.authentication.client.AuthenticatedURL)1 AuthenticationException (org.apache.hadoop.security.authentication.client.AuthenticationException)1 JsonNode (org.codehaus.jackson.JsonNode)1