Search in sources :

Example 1 with SparkApplicationData

use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.

the class InfoExtractorTest method testLoadInfoSpark.

@Test
public void testLoadInfoSpark() {
    final String JOB_DEF_URL = "https://grid.example.com:9000/manager?project=project-name&flow=flow-name&job=job-name";
    final String JOB_EXEC_URL = "https://grid.example.com:9000/executor?execid=123456&job=job-name&attempt=0";
    final String FLOW_DEF_URL = "https://grid.example.com:9000/manager?project=project-name&flow=flow-name";
    final String FLOW_EXEC_URL = "https://grid.example.com:9000/executor?execid=123456";
    final String JAVA_EXTRA_OPTIONS = "spark.driver.extraJavaOptions";
    Map<String, String> properties = new HashMap<String, String>();
    properties = properties.$plus(new Tuple2<String, String>(JAVA_EXTRA_OPTIONS, "-Dazkaban.link.workflow.url=" + FLOW_DEF_URL + " -Dazkaban.link.job.url=" + JOB_DEF_URL + " -Dazkaban.link.execution.url=" + FLOW_EXEC_URL + " -Dazkaban.link.attempt.url=" + JOB_EXEC_URL));
    AppResult result = new AppResult();
    HadoopApplicationData data = new SparkApplicationData("application_5678", properties, new ApplicationInfoImpl("", "", new Vector<ApplicationAttemptInfoImpl>(0, 1, 0)), new Vector<JobData>(0, 1, 0), new Vector<StageData>(0, 1, 0), new Vector<ExecutorSummary>(0, 1, 0));
    InfoExtractor.loadInfo(result, data);
    assertTrue(result.jobDefId.equals(JOB_DEF_URL));
    assertTrue(result.jobExecId.equals(JOB_EXEC_URL));
    assertTrue(result.flowDefId.equals(FLOW_DEF_URL));
    assertTrue(result.flowExecId.equals(FLOW_EXEC_URL));
}
Also used : HashMap(scala.collection.immutable.HashMap) HadoopApplicationData(com.linkedin.drelephant.analysis.HadoopApplicationData) SparkApplicationData(com.linkedin.drelephant.spark.data.SparkApplicationData) ApplicationInfoImpl(com.linkedin.drelephant.spark.fetchers.statusapiv1.ApplicationInfoImpl) AppResult(models.AppResult) StageData(com.linkedin.drelephant.spark.fetchers.statusapiv1.StageData) ExecutorSummary(com.linkedin.drelephant.spark.fetchers.statusapiv1.ExecutorSummary) Tuple2(scala.Tuple2) JobData(com.linkedin.drelephant.spark.fetchers.statusapiv1.JobData) Vector(scala.collection.immutable.Vector) Test(org.junit.Test)

Example 2 with SparkApplicationData

use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.

the class InfoExtractorTest method testLoadInfoSparkNoConfig.

@Test
public void testLoadInfoSparkNoConfig() {
    Map<String, String> properties = new HashMap<String, String>();
    AppResult result = new AppResult();
    HadoopApplicationData data = new SparkApplicationData("application_5678", properties, new ApplicationInfoImpl("", "", new Vector<ApplicationAttemptInfoImpl>(0, 1, 0)), new Vector<JobData>(0, 1, 0), new Vector<StageData>(0, 1, 0), new Vector<ExecutorSummary>(0, 1, 0));
    // test to make sure loadInfo does not throw exception if properties are not defined
    InfoExtractor.loadInfo(result, data);
    assertTrue(result.jobDefId.isEmpty());
    assertTrue(result.jobExecId.isEmpty());
    assertTrue(result.flowDefId.isEmpty());
    assertTrue(result.flowExecId.isEmpty());
}
Also used : HashMap(scala.collection.immutable.HashMap) HadoopApplicationData(com.linkedin.drelephant.analysis.HadoopApplicationData) SparkApplicationData(com.linkedin.drelephant.spark.data.SparkApplicationData) ApplicationInfoImpl(com.linkedin.drelephant.spark.fetchers.statusapiv1.ApplicationInfoImpl) AppResult(models.AppResult) StageData(com.linkedin.drelephant.spark.fetchers.statusapiv1.StageData) ExecutorSummary(com.linkedin.drelephant.spark.fetchers.statusapiv1.ExecutorSummary) JobData(com.linkedin.drelephant.spark.fetchers.statusapiv1.JobData) Vector(scala.collection.immutable.Vector) Test(org.junit.Test)

Example 3 with SparkApplicationData

use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.

the class InfoExtractor method loadInfo.

/**
 * Loads result with the info depending on the application type
 *
 * @param result The jobResult to be loaded with.
 * @param data The Hadoop application data
 */
public static void loadInfo(AppResult result, HadoopApplicationData data) {
    Properties properties = new Properties();
    if (data instanceof MapReduceApplicationData) {
        properties = retrieveMapreduceProperties((MapReduceApplicationData) data);
    } else if (data instanceof SparkApplicationData) {
        properties = retrieveSparkProperties((SparkApplicationData) data);
    } else if (data instanceof TezApplicationData) {
        properties = retrieveTezProperties((TezApplicationData) data);
    }
    Scheduler scheduler = getSchedulerInstance(data.getAppId(), properties);
    if (scheduler == null) {
        logger.info("No Scheduler found for appid: " + data.getAppId());
        loadNoSchedulerInfo(result);
    } else if (StringUtils.isEmpty(scheduler.getJobDefId()) || StringUtils.isEmpty(scheduler.getJobExecId()) || StringUtils.isEmpty(scheduler.getFlowDefId()) || StringUtils.isEmpty(scheduler.getFlowExecId())) {
        logger.warn("This job doesn't have the correct " + scheduler.getSchedulerName() + " integration support. I" + " will treat this as an adhoc job");
        logger.info("No Flow/job info found for appid: " + data.getAppId());
        loadNoSchedulerInfo(result);
    } else {
        loadSchedulerInfo(result, data, scheduler);
    }
}
Also used : MapReduceApplicationData(com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData) Scheduler(com.linkedin.drelephant.schedulers.Scheduler) SparkApplicationData(com.linkedin.drelephant.spark.data.SparkApplicationData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) Properties(java.util.Properties)

Aggregations

SparkApplicationData (com.linkedin.drelephant.spark.data.SparkApplicationData)3 HadoopApplicationData (com.linkedin.drelephant.analysis.HadoopApplicationData)2 ApplicationInfoImpl (com.linkedin.drelephant.spark.fetchers.statusapiv1.ApplicationInfoImpl)2 ExecutorSummary (com.linkedin.drelephant.spark.fetchers.statusapiv1.ExecutorSummary)2 JobData (com.linkedin.drelephant.spark.fetchers.statusapiv1.JobData)2 StageData (com.linkedin.drelephant.spark.fetchers.statusapiv1.StageData)2 AppResult (models.AppResult)2 Test (org.junit.Test)2 HashMap (scala.collection.immutable.HashMap)2 Vector (scala.collection.immutable.Vector)2 MapReduceApplicationData (com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData)1 Scheduler (com.linkedin.drelephant.schedulers.Scheduler)1 TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)1 Properties (java.util.Properties)1 Tuple2 (scala.Tuple2)1