use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.
the class ReducerGCHeuristicTest method analyzeJob.
private Severity analyzeJob(long runtimeMs, long cpuMs, long gcMs) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
TezCounterData counter = new TezCounterData();
counter.set(TezCounterData.CounterName.CPU_MILLISECONDS, cpuMs);
counter.set(TezCounterData.CounterName.GC_TIME_MILLIS, gcMs);
int i = 0;
for (; i < NUMTASKS; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTimeAndCounter(new long[] { runtimeMs, 0, 0, 0, 0 }, counter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.
the class ReducerMemoryHeuristicTest method analyzeJob.
private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[NUMTASKS + 1];
TezCounterData counter = new TezCounterData();
counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
Properties p = new Properties();
p.setProperty(com.linkedin.drelephant.mapreduce.heuristics.ReducerMemoryHeuristic.REDUCER_MEMORY_CONF, Long.toString(containerMemMB));
int i = 0;
for (; i < NUMTASKS; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTimeAndCounter(new long[5], counter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
data.setConf(p);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.
the class ReducerTimeHeuristicTest method analyzeJob.
private Severity analyzeJob(long runtimeMs, int numTasks) throws IOException {
TezCounterData dummyCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[numTasks + 1];
int i = 0;
for (; i < numTasks; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTime(new long[] { runtimeMs, 0, 0, 0, 0 });
reducers[i].setCounter(dummyCounter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(dummyCounter).setReduceTaskData(reducers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.
the class TezMetricsAggregator method aggregate.
@Override
public void aggregate(HadoopApplicationData hadoopData) {
TezApplicationData data = (TezApplicationData) hadoopData;
long mapTaskContainerSize = getMapContainerSize(data);
long reduceTaskContainerSize = getReducerContainerSize(data);
int reduceTaskSlowStartPercentage = (int) (Double.parseDouble(data.getConf().getProperty(REDUCER_SLOW_START_CONFIG)) * 100);
// overwrite reduceTaskSlowStartPercentage to 100%. TODO: make use of the slow start percent
reduceTaskSlowStartPercentage = 100;
_mapTasks = new TezTaskLevelAggregatedMetrics(data.getMapTaskData(), mapTaskContainerSize, data.getStartTime());
long reduceIdealStartTime = _mapTasks.getNthPercentileFinishTime(reduceTaskSlowStartPercentage);
// Mappers list is empty
if (reduceIdealStartTime == -1) {
// ideal start time for reducer is infinite since it cannot start
reduceIdealStartTime = Long.MAX_VALUE;
}
_reduceTasks = new TezTaskLevelAggregatedMetrics(data.getReduceTaskData(), reduceTaskContainerSize, reduceIdealStartTime);
_hadoopAggregatedData.setResourceUsed(_mapTasks.getResourceUsed() + _reduceTasks.getResourceUsed());
_hadoopAggregatedData.setTotalDelay(_mapTasks.getDelay() + _reduceTasks.getDelay());
_hadoopAggregatedData.setResourceWasted(_mapTasks.getResourceWasted() + _reduceTasks.getResourceWasted());
}
use of com.linkedin.drelephant.tez.data.TezApplicationData in project dr-elephant by linkedin.
the class InfoExtractor method loadInfo.
/**
* Loads result with the info depending on the application type
*
* @param result The jobResult to be loaded with.
* @param data The Hadoop application data
*/
public static void loadInfo(AppResult result, HadoopApplicationData data) {
Properties properties = new Properties();
if (data instanceof MapReduceApplicationData) {
properties = retrieveMapreduceProperties((MapReduceApplicationData) data);
} else if (data instanceof SparkApplicationData) {
properties = retrieveSparkProperties((SparkApplicationData) data);
} else if (data instanceof TezApplicationData) {
properties = retrieveTezProperties((TezApplicationData) data);
}
Scheduler scheduler = getSchedulerInstance(data.getAppId(), properties);
if (scheduler == null) {
logger.info("No Scheduler found for appid: " + data.getAppId());
loadNoSchedulerInfo(result);
} else if (StringUtils.isEmpty(scheduler.getJobDefId()) || StringUtils.isEmpty(scheduler.getJobExecId()) || StringUtils.isEmpty(scheduler.getFlowDefId()) || StringUtils.isEmpty(scheduler.getFlowExecId())) {
logger.warn("This job doesn't have the correct " + scheduler.getSchedulerName() + " integration support. I" + " will treat this as an adhoc job");
logger.info("No Flow/job info found for appid: " + data.getAppId());
loadNoSchedulerInfo(result);
} else {
loadSchedulerInfo(result, data, scheduler);
}
}
Aggregations