use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.
the class InfoExtractorTest method testLoadInfoSpark.
@Test
public void testLoadInfoSpark() {
final String JOB_DEF_URL = "https://grid.example.com:9000/manager?project=project-name&flow=flow-name&job=job-name";
final String JOB_EXEC_URL = "https://grid.example.com:9000/executor?execid=123456&job=job-name&attempt=0";
final String FLOW_DEF_URL = "https://grid.example.com:9000/manager?project=project-name&flow=flow-name";
final String FLOW_EXEC_URL = "https://grid.example.com:9000/executor?execid=123456";
final String JAVA_EXTRA_OPTIONS = "spark.driver.extraJavaOptions";
Map<String, String> properties = new HashMap<String, String>();
properties = properties.$plus(new Tuple2<String, String>(JAVA_EXTRA_OPTIONS, "-Dazkaban.link.workflow.url=" + FLOW_DEF_URL + " -Dazkaban.link.job.url=" + JOB_DEF_URL + " -Dazkaban.link.execution.url=" + FLOW_EXEC_URL + " -Dazkaban.link.attempt.url=" + JOB_EXEC_URL));
AppResult result = new AppResult();
HadoopApplicationData data = new SparkApplicationData("application_5678", properties, new ApplicationInfoImpl("", "", new Vector<ApplicationAttemptInfoImpl>(0, 1, 0)), new Vector<JobData>(0, 1, 0), new Vector<StageData>(0, 1, 0), new Vector<ExecutorSummary>(0, 1, 0));
InfoExtractor.loadInfo(result, data);
assertTrue(result.jobDefId.equals(JOB_DEF_URL));
assertTrue(result.jobExecId.equals(JOB_EXEC_URL));
assertTrue(result.flowDefId.equals(FLOW_DEF_URL));
assertTrue(result.flowExecId.equals(FLOW_EXEC_URL));
}
use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.
the class InfoExtractorTest method testLoadInfoSparkNoConfig.
@Test
public void testLoadInfoSparkNoConfig() {
Map<String, String> properties = new HashMap<String, String>();
AppResult result = new AppResult();
HadoopApplicationData data = new SparkApplicationData("application_5678", properties, new ApplicationInfoImpl("", "", new Vector<ApplicationAttemptInfoImpl>(0, 1, 0)), new Vector<JobData>(0, 1, 0), new Vector<StageData>(0, 1, 0), new Vector<ExecutorSummary>(0, 1, 0));
// test to make sure loadInfo does not throw exception if properties are not defined
InfoExtractor.loadInfo(result, data);
assertTrue(result.jobDefId.isEmpty());
assertTrue(result.jobExecId.isEmpty());
assertTrue(result.flowDefId.isEmpty());
assertTrue(result.flowExecId.isEmpty());
}
use of com.linkedin.drelephant.spark.data.SparkApplicationData in project dr-elephant by linkedin.
the class InfoExtractor method loadInfo.
/**
* Loads result with the info depending on the application type
*
* @param result The jobResult to be loaded with.
* @param data The Hadoop application data
*/
public static void loadInfo(AppResult result, HadoopApplicationData data) {
Properties properties = new Properties();
if (data instanceof MapReduceApplicationData) {
properties = retrieveMapreduceProperties((MapReduceApplicationData) data);
} else if (data instanceof SparkApplicationData) {
properties = retrieveSparkProperties((SparkApplicationData) data);
} else if (data instanceof TezApplicationData) {
properties = retrieveTezProperties((TezApplicationData) data);
}
Scheduler scheduler = getSchedulerInstance(data.getAppId(), properties);
if (scheduler == null) {
logger.info("No Scheduler found for appid: " + data.getAppId());
loadNoSchedulerInfo(result);
} else if (StringUtils.isEmpty(scheduler.getJobDefId()) || StringUtils.isEmpty(scheduler.getJobExecId()) || StringUtils.isEmpty(scheduler.getFlowDefId()) || StringUtils.isEmpty(scheduler.getFlowExecId())) {
logger.warn("This job doesn't have the correct " + scheduler.getSchedulerName() + " integration support. I" + " will treat this as an adhoc job");
logger.info("No Flow/job info found for appid: " + data.getAppId());
loadNoSchedulerInfo(result);
} else {
loadSchedulerInfo(result, data, scheduler);
}
}
Aggregations