use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class MapperDataSkewHeuristicTest method analyzeJobTime.
private Severity analyzeJobTime(int numSmallTasks, int numLongTasks, long smallTimeTaken, long longTimeTaken) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] mappers = new TezTaskData[numSmallTasks + numLongTasks + 1];
int i = 0;
for (; i < numSmallTasks; i++) {
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
mappers[i].setTotalTimeMs(smallTimeTaken, true);
mappers[i].setCounter(jobCounter);
}
for (; i < numSmallTasks + numLongTasks; i++) {
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
mappers[i].setTotalTimeMs(longTimeTaken, true);
mappers[i].setCounter(jobCounter);
}
// Non-sampled task, which does not contain time data
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class MapperMemoryHeuristicTest method analyzeJob.
private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] mappers = new TezTaskData[NUMTASKS + 1];
TezCounterData counter = new TezCounterData();
counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
Properties p = new Properties();
p.setProperty(MapperMemoryHeuristic.MAPPER_MEMORY_CONF, Long.toString(containerMemMB));
int i = 0;
for (; i < NUMTASKS; i++) {
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
mappers[i].setTime(new long[5]);
mappers[i].setCounter(counter);
}
// Non-sampled task, which does not contain time and counter data
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
data.setConf(p);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class MapperTimeHeuristicTest method analyzeJob.
private Severity analyzeJob(int numTasks, long runtime) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] mappers = new TezTaskData[numTasks + 1];
TezCounterData taskCounter = new TezCounterData();
taskCounter.set(TezCounterData.CounterName.S3A_BYTES_READ, DUMMY_INPUT_SIZE / 4);
int i = 0;
for (; i < numTasks; i++) {
mappers[i] = new TezTaskData(jobCounter, new long[] { runtime, 0, 0, 0, 0 });
}
// Non-sampled task, which does not contain time and counter data
mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class ReducerDataSkewHeuristicTest method analyzeJobTime.
private Severity analyzeJobTime(int numSmallTasks, int numLongTasks, long smallTimeTaken, long longTimeTaken) throws IOException {
TezCounterData jobCounter = new TezCounterData();
TezTaskData[] reducers = new TezTaskData[numSmallTasks + numLongTasks + 1];
int i = 0;
for (; i < numSmallTasks; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTotalTimeMs(smallTimeTaken, true);
reducers[i].setCounter(jobCounter);
}
for (; i < numSmallTasks + numLongTasks; i++) {
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTotalTimeMs(longTimeTaken, true);
reducers[i].setCounter(jobCounter);
}
// Non-sampled task, which does not contain time data
reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.
the class TezFetcher method fetchData.
public TezApplicationData fetchData(AnalyticJob analyticJob) throws IOException, AuthenticationException {
int maxSize = 0;
String appId = analyticJob.getAppId();
TezApplicationData jobData = new TezApplicationData();
jobData.setAppId(appId);
Properties jobConf = _jsonFactory.getProperties(_urlFactory.getApplicationURL(appId));
jobData.setConf(jobConf);
URL dagIdsUrl = _urlFactory.getDagURLByTezApplicationId(appId);
List<String> dagIdsByApplicationId = _jsonFactory.getDagIdsByApplicationId(dagIdsUrl);
List<TezTaskData> mapperListAggregate = new ArrayList<TezTaskData>();
List<TezTaskData> reducerListAggregate = new ArrayList<TezTaskData>();
// Iterate over dagIds and choose the dagId with the highest no. of tasks/highest impact as settings changes can be made only at DAG level.
for (String dagId : dagIdsByApplicationId) {
try {
// set job task independent properties
URL dagUrl = _urlFactory.getDagURL(dagId);
String state = _jsonFactory.getState(dagUrl);
jobData.setStartTime(_jsonFactory.getDagStartTime(dagUrl));
jobData.setFinishTime(_jsonFactory.getDagEndTime(dagUrl));
if (state.equals("SUCCEEDED")) {
jobData.setSucceeded(true);
List<TezTaskData> mapperList = new ArrayList<TezTaskData>();
List<TezTaskData> reducerList = new ArrayList<TezTaskData>();
// Fetch task data
URL vertexListUrl = _urlFactory.getVertexListURL(dagId);
_jsonFactory.getTaskDataAll(vertexListUrl, dagId, mapperList, reducerList);
if (mapperList.size() + reducerList.size() > maxSize) {
mapperListAggregate = mapperList;
reducerListAggregate = reducerList;
maxSize = mapperList.size() + reducerList.size();
}
}
if (state.equals("FAILED")) {
jobData.setSucceeded(false);
}
} finally {
ThreadContextMR2.updateAuthToken();
}
}
TezTaskData[] mapperData = mapperListAggregate.toArray(new TezTaskData[mapperListAggregate.size()]);
TezTaskData[] reducerData = reducerListAggregate.toArray(new TezTaskData[reducerListAggregate.size()]);
TezCounterData dagCounter = _jsonFactory.getDagCounter(_urlFactory.getDagURL(_jsonFactory.getDagIdsByApplicationId(dagIdsUrl).get(0)));
jobData.setCounters(dagCounter).setMapTaskData(mapperData).setReduceTaskData(reducerData);
return jobData;
}
Aggregations