Search in sources :

Example 1 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class BaselineComputeUtil method computeBaseline.

/**
 * Computes baseline for the jobs new to auto tuning
 * @return tuningJobDefinition
 */
public List<TuningJobDefinition> computeBaseline() {
    logger.info("Starting baseline computation");
    List<TuningJobDefinition> tuningJobDefinitions = getJobForBaselineComputation();
    for (TuningJobDefinition tuningJobDefinition : tuningJobDefinitions) {
        try {
            updateBaselineForJob(tuningJobDefinition);
        } catch (Exception e) {
            logger.error("Error in computing baseline for job: " + tuningJobDefinition.job.jobName, e);
        }
    }
    updateMetrics(tuningJobDefinitions);
    logger.info("Baseline computation complete");
    return tuningJobDefinitions;
}
Also used : TuningJobDefinition(models.TuningJobDefinition)

Example 2 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class APIFitnessComputeUtil method updateExecutionMetrics.

/**
 * Updates the execution metrics
 * @param completedExecutions List of completed executions
 */
protected void updateExecutionMetrics(List<TuningJobExecution> completedExecutions) {
    logger.debug("Updating execution metrics");
    updateAuthToken();
    for (TuningJobExecution tuningJobExecution : completedExecutions) {
        logger.debug("Completed executions before updating metric: " + Json.toJson(tuningJobExecution));
        try {
            JobExecution jobExecution = tuningJobExecution.jobExecution;
            JobDefinition job = jobExecution.job;
            URL jobExecURL = new URL(new URL(_drElephantURL), String.format("/rest/jobexec?id=%s", URLEncoder.encode(jobExecution.jobExecId)));
            HttpURLConnection conn = (HttpURLConnection) jobExecURL.openConnection();
            JsonNode allApps = _objectMapper.readTree(conn.getInputStream());
            // job id match and tuning enabled
            TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.id, job.id).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
            if (allApps != null && allApps.size() > 0) {
                Long totalExecutionTime = 0L;
                Double totalResourceUsed = 0D;
                Double totalInputBytesInBytes = 0D;
                for (JsonNode app : allApps) {
                    logger.info("Job Execution Update: ApplicationID " + app.get("id").getTextValue());
                    Long executionTime = app.get("finishTime").getLongValue() - app.get("startTime").getLongValue() - app.get("totalDelay").getLongValue();
                    totalExecutionTime += executionTime;
                    totalResourceUsed += app.get("resourceUsed").getDoubleValue();
                    totalInputBytesInBytes += getTotalInputBytes(app.get("id").getTextValue());
                }
                if (totalExecutionTime != 0) {
                    jobExecution.executionTime = totalExecutionTime * 1.0 / (1000 * 60);
                    jobExecution.resourceUsage = totalResourceUsed * 1.0 / (1024 * 3600);
                    jobExecution.inputSizeInBytes = totalInputBytesInBytes;
                    logger.info("Job Execution Update: UpdatedValue " + totalExecutionTime + ":" + totalResourceUsed + ":" + totalInputBytesInBytes);
                }
                logger.debug("Job execution " + jobExecution.resourceUsage);
                logger.debug("Job details: AvgResourceUsage " + tuningJobDefinition.averageResourceUsage + ", allowedMaxResourceUsagePercent: " + tuningJobDefinition.allowedMaxResourceUsagePercent);
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                } else if (jobExecution.resourceUsage > (tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent / 100.0)) {
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * totalInputBytesInBytes);
                } else {
                    tuningJobExecution.fitness = jobExecution.resourceUsage * FileUtils.ONE_GB / totalInputBytesInBytes;
                }
                tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                jobExecution.update();
                tuningJobExecution.update();
                logger.debug("Completed executions after updating metrics: " + Json.toJson(tuningJobExecution));
            } else {
                if (jobExecution.executionState.equals(JobExecution.ExecutionState.FAILED) || jobExecution.executionState.equals(JobExecution.ExecutionState.CANCELLED)) {
                    // Todo: Check if the reason of failure is auto tuning and  handle cancelled cases
                    tuningJobExecution.fitness = 3 * tuningJobDefinition.averageResourceUsage * tuningJobDefinition.allowedMaxResourceUsagePercent * FileUtils.ONE_GB / (100.0 * tuningJobDefinition.averageInputSizeInBytes);
                    jobExecution.executionTime = 0D;
                    jobExecution.resourceUsage = 0D;
                    jobExecution.inputSizeInBytes = 0D;
                    tuningJobExecution.paramSetState = ParamSetStatus.FITNESS_COMPUTED;
                    jobExecution.update();
                    tuningJobExecution.update();
                }
            }
        } catch (Exception e) {
            logger.error("Error updating fitness of job_exec_id: " + tuningJobExecution.jobExecution.id + "\n Stacktrace: ", e);
        }
    }
    logger.debug("Execution metrics updated");
}
Also used : TuningJobExecution(models.TuningJobExecution) JobExecution(models.JobExecution) HttpURLConnection(java.net.HttpURLConnection) JsonNode(org.codehaus.jackson.JsonNode) TuningJobExecution(models.TuningJobExecution) TuningJobDefinition(models.TuningJobDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition) URL(java.net.URL) AuthenticatedURL(org.apache.hadoop.security.authentication.client.AuthenticatedURL) AuthenticationException(org.apache.hadoop.security.authentication.client.AuthenticationException) IOException(java.io.IOException)

Example 3 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class AutoTuningAPIHelper method addNewJobForTuning.

/**
 * Add new job for tuning
 * @param tuningInput Tuning input parameters
 * @return Job
 */
private TuningJobDefinition addNewJobForTuning(TuningInput tuningInput) {
    logger.info("Adding new job for tuning, job id: " + tuningInput.getJobDefId());
    JobDefinition job = JobDefinition.find.select("*").where().eq(JobDefinition.TABLE.jobDefId, tuningInput.getJobDefId()).findUnique();
    FlowDefinition flowDefinition = FlowDefinition.find.where().eq(FlowDefinition.TABLE.flowDefId, tuningInput.getFlowDefId()).findUnique();
    if (flowDefinition == null) {
        flowDefinition = new FlowDefinition();
        flowDefinition.flowDefId = tuningInput.getFlowDefId();
        flowDefinition.flowDefUrl = tuningInput.getFlowDefUrl();
        flowDefinition.save();
    }
    if (job == null) {
        job = new JobDefinition();
        job.jobDefId = tuningInput.getJobDefId();
        job.scheduler = tuningInput.getScheduler();
        job.username = tuningInput.getUserName();
        job.jobName = tuningInput.getJobName();
        job.jobDefUrl = tuningInput.getJobDefUrl();
        job.flowDefinition = flowDefinition;
        job.save();
    }
    String flowExecId = tuningInput.getFlowExecId();
    String jobExecId = tuningInput.getJobExecId();
    String flowExecUrl = tuningInput.getFlowExecUrl();
    String jobExecUrl = tuningInput.getJobExecUrl();
    String client = tuningInput.getClient();
    String defaultParams = tuningInput.getDefaultParams();
    TuningJobDefinition tuningJobDefinition = new TuningJobDefinition();
    tuningJobDefinition.job = job;
    tuningJobDefinition.client = client;
    tuningJobDefinition.tuningAlgorithm = tuningInput.getTuningAlgorithm();
    tuningJobDefinition.tuningEnabled = 1;
    tuningJobDefinition.allowedMaxExecutionTimePercent = tuningInput.getAllowedMaxExecutionTimePercent();
    tuningJobDefinition.allowedMaxResourceUsagePercent = tuningInput.getAllowedMaxResourceUsagePercent();
    tuningJobDefinition.save();
    TuningJobExecution tuningJobExecution = insertDefaultJobExecution(job, flowExecId, jobExecId, flowExecUrl, jobExecUrl, flowDefinition, tuningInput.getTuningAlgorithm());
    insertDefaultParameters(tuningJobExecution.jobExecution, defaultParams);
    logger.info("Added job: " + tuningInput.getJobDefId() + " for tuning");
    return tuningJobDefinition;
}
Also used : TuningJobDefinition(models.TuningJobDefinition) TuningJobExecution(models.TuningJobExecution) FlowDefinition(models.FlowDefinition) JobDefinition(models.JobDefinition) TuningJobDefinition(models.TuningJobDefinition)

Example 4 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class RestAPITest method testRestGetCurrentRunParametersNewJob.

@Test
public void testRestGetCurrentRunParametersNewJob() {
    Configuration configuration = ElephantContext.instance().getAutoTuningConf();
    Boolean autoTuningEnabled = configuration.getBoolean(DrElephant.AUTO_TUNING_ENABLED, false);
    org.junit.Assume.assumeTrue(autoTuningEnabled);
    running(testServer(TEST_SERVER_PORT, fakeApp), new Runnable() {

        public void run() {
            populateAutoTuningTestData1();
            JsonNode jsonNode = getTestGetCurrentRunParameterNewData();
            final WS.Response response = WS.url(BASE_URL + REST_GET_CURRENT_RUN_PARAMETERS).post(jsonNode).get(RESPONSE_TIMEOUT, TimeUnit.MILLISECONDS);
            final JsonNode jsonResponse = response.asJson();
            assertTrue("Get current run param output did not match", jsonResponse.path("mapreduce.map.memory.mb").asDouble() == 2048D);
            assertTrue("Get current run param output did not match", jsonResponse.path("mapreduce.reduce.memory.mb").asDouble() == 2048D);
            assertTrue("Get current run param output size did not match", jsonResponse.size() == 2);
            TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.jobDefId, "https://elephant.linkedin.com:8443/manager?project=AzkabanHelloPigTest&flow=countByCountryFlowSmallNew&job=countByCountryFlowSmallNew_countByCountry").findUnique();
            assertTrue("New Job Not created  ", tuningJobDefinition.job.jobName.equals("countByCountryFlowSmallNew_countByCountry"));
            BaselineComputeUtil baselineComputeUtil = new BaselineComputeUtil();
            baselineComputeUtil.computeBaseline();
            tuningJobDefinition = TuningJobDefinition.find.select("*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.jobDefId, "https://elephant.linkedin.com:8443/manager?project=AzkabanHelloPigTest&flow=countByCountryFlowSmallNew&job=countByCountryFlowSmallNew_countByCountry").findUnique();
            assertTrue("Baseline not computed:averageResourceUsage  ", tuningJobDefinition.averageResourceUsage > 0);
            assertTrue("Baseline not computed:averageInputSizeInBytes  ", tuningJobDefinition.averageInputSizeInBytes > 0);
            assertTrue("Baseline not computed:averageExecutionTime  ", tuningJobDefinition.averageExecutionTime > 0);
        }
    });
}
Also used : BaselineComputeUtil(com.linkedin.drelephant.tuning.BaselineComputeUtil) Configuration(org.apache.hadoop.conf.Configuration) JsonNode(com.fasterxml.jackson.databind.JsonNode) TuningJobDefinition(models.TuningJobDefinition) Test(org.junit.Test)

Example 5 with TuningJobDefinition

use of models.TuningJobDefinition in project dr-elephant by linkedin.

the class AutoTuningAPIHelper method getCurrentRunParameters.

/**
 * Handles the api request and returns param suggestions as response
 * @param tuningInput Rest api parameters
 * @return Parameter Suggestion
 */
public Map<String, Double> getCurrentRunParameters(TuningInput tuningInput) {
    logger.info("Parameter suggestion request for execution: " + tuningInput.getJobExecId());
    setDefaultValue(tuningInput);
    String jobDefId = tuningInput.getJobDefId();
    TuningJobDefinition tuningJobDefinition = TuningJobDefinition.find.select("*").fetch(TuningJobDefinition.TABLE.job, "*").where().eq(TuningJobDefinition.TABLE.job + "." + JobDefinition.TABLE.jobDefId, jobDefId).eq(TuningJobDefinition.TABLE.tuningEnabled, 1).findUnique();
    // If new job for tuning, update db with new job configuration
    if (tuningJobDefinition == null) {
        logger.debug("New job encountered for tuning");
        AutoTuningMetricsController.markNewAutoTuningJob();
        tuningJobDefinition = addNewJobForTuning(tuningInput);
    }
    logger.debug("Finding parameter suggestion for job: " + tuningJobDefinition.job.jobName);
    TuningJobExecution tuningJobExecution = TuningJobExecution.find.select("*").fetch(TuningJobExecution.TABLE.jobExecution, "*").fetch(TuningJobExecution.TABLE.jobExecution + "." + JobExecution.TABLE.job, "*").where().eq(TuningJobExecution.TABLE.jobExecution + "." + JobExecution.TABLE.job + "." + JobDefinition.TABLE.id, tuningJobDefinition.job.id).eq(TuningJobExecution.TABLE.paramSetState, ParamSetStatus.CREATED).order().asc(TuningJobExecution.TABLE.jobExecution + "." + JobExecution.TABLE.createdTs).setMaxRows(1).findUnique();
    // If no new parameter set for suggestion, create a new suggestion with default parameter
    if (tuningJobExecution == null) {
        logger.info("Returning default parameters as no parameter suggestion found for job: " + tuningJobDefinition.job.jobName);
        AutoTuningMetricsController.markParamSetNotFound();
        tuningJobExecution = createDefaultJobExecution(tuningJobDefinition);
    }
    logger.debug("Finding parameters corresponding to execution id: " + tuningJobExecution.jobExecution.id);
    List<JobSuggestedParamValue> jobSuggestedParamValues = JobSuggestedParamValue.find.where().eq(JobSuggestedParamValue.TABLE.jobExecution + "." + JobExecution.TABLE.id, tuningJobExecution.jobExecution.id).findList();
    logger.debug("Number of output parameters : " + jobSuggestedParamValues.size());
    Map<String, Double> paramValues = new HashMap<String, Double>();
    if (jobSuggestedParamValues != null) {
        for (JobSuggestedParamValue jobSuggestedParamValue : jobSuggestedParamValues) {
            logger.debug("Param Name is " + jobSuggestedParamValue.tuningParameter.paramName + " And value is " + jobSuggestedParamValue.paramValue);
            paramValues.put(jobSuggestedParamValue.tuningParameter.paramName, jobSuggestedParamValue.paramValue);
        }
    }
    updateJobExecutionParameter(tuningJobExecution, tuningInput);
    logger.info("Finishing getCurrentRunParameters");
    return paramValues;
}
Also used : HashMap(java.util.HashMap) JobSuggestedParamValue(models.JobSuggestedParamValue) TuningJobDefinition(models.TuningJobDefinition) TuningJobExecution(models.TuningJobExecution)

Aggregations

TuningJobDefinition (models.TuningJobDefinition)7 TuningJobExecution (models.TuningJobExecution)4 JobDefinition (models.JobDefinition)3 JobExecution (models.JobExecution)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 BaselineComputeUtil (com.linkedin.drelephant.tuning.BaselineComputeUtil)1 IOException (java.io.IOException)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 HashMap (java.util.HashMap)1 AppResult (models.AppResult)1 FlowDefinition (models.FlowDefinition)1 JobSuggestedParamValue (models.JobSuggestedParamValue)1 Configuration (org.apache.hadoop.conf.Configuration)1 AuthenticatedURL (org.apache.hadoop.security.authentication.client.AuthenticatedURL)1 AuthenticationException (org.apache.hadoop.security.authentication.client.AuthenticationException)1 JsonNode (org.codehaus.jackson.JsonNode)1 Test (org.junit.Test)1