use of com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData in project dr-elephant by linkedin.
the class MapReduceFSFetcherHadoop2 method fetchData.
@Override
public MapReduceApplicationData fetchData(AnalyticJob job) throws IOException {
DataFiles files = getHistoryFiles(job);
String confFile = files.getJobConfPath();
String histFile = files.getJobHistPath();
String appId = job.getAppId();
String jobId = Utils.getJobIdFromApplicationId(appId);
MapReduceApplicationData jobData = new MapReduceApplicationData();
jobData.setAppId(appId).setJobId(jobId);
// Fetch job config
Configuration jobConf = new Configuration(false);
jobConf.addResource(_fs.open(new Path(confFile)), confFile);
Properties jobConfProperties = new Properties();
for (Map.Entry<String, String> entry : jobConf) {
jobConfProperties.put(entry.getKey(), entry.getValue());
}
jobData.setJobConf(jobConfProperties);
// Check if job history file is too large and should be throttled
if (_fs.getFileStatus(new Path(histFile)).getLen() > _maxLogSizeInMB * FileUtils.ONE_MB) {
String errMsg = "The history log of MapReduce application: " + appId + " is over the limit size of " + _maxLogSizeInMB + " MB, the parsing process gets throttled.";
logger.warn(errMsg);
jobData.setDiagnosticInfo(errMsg);
// set succeeded to false to avoid heuristic analysis
jobData.setSucceeded(false);
return jobData;
}
// Analyze job history file
JobHistoryParser parser = new JobHistoryParser(_fs, histFile);
JobHistoryParser.JobInfo jobInfo = parser.parse();
IOException parseException = parser.getParseException();
if (parseException != null) {
throw new RuntimeException("Could not parse history file " + histFile, parseException);
}
jobData.setSubmitTime(jobInfo.getSubmitTime());
jobData.setStartTime(jobInfo.getLaunchTime());
jobData.setFinishTime(jobInfo.getFinishTime());
String state = jobInfo.getJobStatus();
if (state.equals("SUCCEEDED")) {
jobData.setSucceeded(true);
} else if (state.equals("FAILED")) {
jobData.setSucceeded(false);
jobData.setDiagnosticInfo(jobInfo.getErrorInfo());
} else {
throw new RuntimeException("job neither succeeded or failed. can not process it ");
}
// Fetch job counter
MapReduceCounterData jobCounter = getCounterData(jobInfo.getTotalCounters());
// Fetch task data
Map<TaskID, JobHistoryParser.TaskInfo> allTasks = jobInfo.getAllTasks();
List<JobHistoryParser.TaskInfo> mapperInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
List<JobHistoryParser.TaskInfo> reducerInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
for (JobHistoryParser.TaskInfo taskInfo : allTasks.values()) {
if (taskInfo.getTaskType() == TaskType.MAP) {
mapperInfoList.add(taskInfo);
} else {
reducerInfoList.add(taskInfo);
}
}
if (jobInfo.getTotalMaps() > MAX_SAMPLE_SIZE) {
logger.debug(jobId + " total mappers: " + mapperInfoList.size());
}
if (jobInfo.getTotalReduces() > MAX_SAMPLE_SIZE) {
logger.debug(jobId + " total reducers: " + reducerInfoList.size());
}
MapReduceTaskData[] mapperList = getTaskData(jobId, mapperInfoList);
MapReduceTaskData[] reducerList = getTaskData(jobId, reducerInfoList);
jobData.setCounters(jobCounter).setMapperData(mapperList).setReducerData(reducerList);
return jobData;
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData in project dr-elephant by linkedin.
the class MapReduceFetcherHadoop2 method fetchData.
@Override
public MapReduceApplicationData fetchData(AnalyticJob analyticJob) throws IOException, AuthenticationException {
String appId = analyticJob.getAppId();
MapReduceApplicationData jobData = new MapReduceApplicationData();
String jobId = Utils.getJobIdFromApplicationId(appId);
jobData.setAppId(appId).setJobId(jobId);
// Change job tracking url to job history page
analyticJob.setTrackingUrl(_jhistoryWebAddr + jobId);
try {
// Fetch job config
Properties jobConf = _jsonFactory.getProperties(_urlFactory.getJobConfigURL(jobId));
jobData.setJobConf(jobConf);
URL jobURL = _urlFactory.getJobURL(jobId);
String state = _jsonFactory.getState(jobURL);
jobData.setSubmitTime(_jsonFactory.getSubmitTime(jobURL));
jobData.setStartTime(_jsonFactory.getStartTime(jobURL));
jobData.setFinishTime(_jsonFactory.getFinishTime(jobURL));
if (state.equals("SUCCEEDED")) {
jobData.setSucceeded(true);
// Fetch job counter
MapReduceCounterData jobCounter = _jsonFactory.getJobCounter(_urlFactory.getJobCounterURL(jobId));
// Fetch task data
URL taskListURL = _urlFactory.getTaskListURL(jobId);
List<MapReduceTaskData> mapperList = new ArrayList<MapReduceTaskData>();
List<MapReduceTaskData> reducerList = new ArrayList<MapReduceTaskData>();
_jsonFactory.getTaskDataAll(taskListURL, jobId, mapperList, reducerList);
MapReduceTaskData[] mapperData = mapperList.toArray(new MapReduceTaskData[mapperList.size()]);
MapReduceTaskData[] reducerData = reducerList.toArray(new MapReduceTaskData[reducerList.size()]);
jobData.setCounters(jobCounter).setMapperData(mapperData).setReducerData(reducerData);
} else if (state.equals("FAILED")) {
jobData.setSucceeded(false);
// Fetch job counter
MapReduceCounterData jobCounter = _jsonFactory.getJobCounter(_urlFactory.getJobCounterURL(jobId));
// Fetch task data
URL taskListURL = _urlFactory.getTaskListURL(jobId);
List<MapReduceTaskData> mapperList = new ArrayList<MapReduceTaskData>();
List<MapReduceTaskData> reducerList = new ArrayList<MapReduceTaskData>();
_jsonFactory.getTaskDataAll(taskListURL, jobId, mapperList, reducerList);
MapReduceTaskData[] mapperData = mapperList.toArray(new MapReduceTaskData[mapperList.size()]);
MapReduceTaskData[] reducerData = reducerList.toArray(new MapReduceTaskData[reducerList.size()]);
jobData.setCounters(jobCounter).setMapperData(mapperData).setReducerData(reducerData);
String diagnosticInfo;
try {
diagnosticInfo = parseException(jobData.getJobId(), _jsonFactory.getDiagnosticInfo(jobURL));
} catch (Exception e) {
diagnosticInfo = null;
logger.warn("Failed getting diagnostic info for failed job " + jobData.getJobId());
}
jobData.setDiagnosticInfo(diagnosticInfo);
} else {
// Should not reach here
throw new RuntimeException("Job state not supported. Should be either SUCCEEDED or FAILED");
}
} finally {
ThreadContextMR2.updateAuthToken();
}
return jobData;
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceApplicationData in project dr-elephant by linkedin.
the class MapReduceMetricsAggregator method aggregate.
@Override
public void aggregate(HadoopApplicationData hadoopData) {
MapReduceApplicationData data = (MapReduceApplicationData) hadoopData;
long mapTaskContainerSize = getMapContainerSize(data);
long reduceTaskContainerSize = getReducerContainerSize(data);
int reduceTaskSlowStartPercentage = (int) (Double.parseDouble(data.getConf().getProperty(REDUCER_SLOW_START_CONFIG)) * 100);
// overwrite reduceTaskSlowStartPercentage to 100%. TODO: make use of the slow start percent
reduceTaskSlowStartPercentage = 100;
mapTasks = new TaskLevelAggregatedMetrics(data.getMapperData(), mapTaskContainerSize, data.getSubmitTime());
long reduceIdealStartTime = mapTasks.getNthPercentileFinishTime(reduceTaskSlowStartPercentage);
// Mappers list is empty
if (reduceIdealStartTime == -1) {
// ideal start time for reducer is infinite since it cannot start
reduceIdealStartTime = Long.MAX_VALUE;
}
reduceTasks = new TaskLevelAggregatedMetrics(data.getReducerData(), reduceTaskContainerSize, reduceIdealStartTime);
_hadoopAggregatedData.setResourceUsed(mapTasks.getResourceUsed() + reduceTasks.getResourceUsed());
_hadoopAggregatedData.setTotalDelay(mapTasks.getDelay() + reduceTasks.getDelay());
_hadoopAggregatedData.setResourceWasted(mapTasks.getResourceWasted() + reduceTasks.getResourceWasted());
}
Aggregations