use of com.linkedin.drelephant.mapreduce.data.MapReduceTaskData in project dr-elephant by linkedin.
the class ShuffleSortHeuristic method apply.
@Override
public HeuristicResult apply(MapReduceApplicationData data) {
if (!data.getSucceeded()) {
return null;
}
MapReduceTaskData[] tasks = data.getReducerData();
List<Long> execTimeMs = new ArrayList<Long>();
List<Long> shuffleTimeMs = new ArrayList<Long>();
List<Long> sortTimeMs = new ArrayList<Long>();
for (MapReduceTaskData task : tasks) {
if (task.isTimeDataPresent()) {
execTimeMs.add(task.getCodeExecutionTimeMs());
shuffleTimeMs.add(task.getShuffleTimeMs());
sortTimeMs.add(task.getSortTimeMs());
}
}
// Analyze data
long avgExecTimeMs = Statistics.average(execTimeMs);
long avgShuffleTimeMs = Statistics.average(shuffleTimeMs);
long avgSortTimeMs = Statistics.average(sortTimeMs);
Severity shuffleSeverity = getShuffleSortSeverity(avgShuffleTimeMs, avgExecTimeMs);
Severity sortSeverity = getShuffleSortSeverity(avgSortTimeMs, avgExecTimeMs);
Severity severity = Severity.max(shuffleSeverity, sortSeverity);
HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
result.addResultDetail("Number of tasks", Integer.toString(data.getReducerData().length));
result.addResultDetail("Average code runtime", Statistics.readableTimespan(avgExecTimeMs));
String shuffleFactor = Statistics.describeFactor(avgShuffleTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average shuffle time", Statistics.readableTimespan(avgShuffleTimeMs) + " " + shuffleFactor);
String sortFactor = Statistics.describeFactor(avgSortTimeMs, avgExecTimeMs, "x");
result.addResultDetail("Average sort time", Statistics.readableTimespan(avgSortTimeMs) + " " + sortFactor);
return result;
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceTaskData in project dr-elephant by linkedin.
the class ShuffleSortHeuristicTest method analyzeJob.
private Severity analyzeJob(long shuffleTimeMs, long sortTimeMs, long reduceTimeMs) throws IOException {
MapReduceCounterData dummyCounter = new MapReduceCounterData();
MapReduceTaskData[] reducers = new MapReduceTaskData[NUMTASKS + 1];
int i = 0;
for (; i < NUMTASKS; i++) {
reducers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTimeAndCounter(new long[] { shuffleTimeMs + sortTimeMs + reduceTimeMs, shuffleTimeMs, sortTimeMs, 0, 0 }, dummyCounter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
MapReduceApplicationData data = new MapReduceApplicationData().setCounters(dummyCounter).setReducerData(reducers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceTaskData in project dr-elephant by linkedin.
the class MapReduceFSFetcherHadoop2Test method testGetTaskData.
@Test
public void testGetTaskData() {
FetcherConfiguration fetcherConf = new FetcherConfiguration(document9.getDocumentElement());
try {
MapReduceFSFetcherHadoop2 fetcher = new MapReduceFSFetcherHadoop2(fetcherConf.getFetchersConfigurationData().get(0));
String jobId = "job_14000_001";
List<JobHistoryParser.TaskInfo> infoList = new ArrayList<JobHistoryParser.TaskInfo>();
infoList.add(new MockTaskInfo(1, true));
infoList.add(new MockTaskInfo(2, false));
MapReduceTaskData[] taskList = fetcher.getTaskData(jobId, infoList);
Assert.assertNotNull("taskList should not be null.", taskList);
int succeededTaskCount = 0;
for (MapReduceTaskData task : taskList) {
Assert.assertNotNull("Null pointer in taskList.", task);
if (task.getState().equals("SUCCEEDED")) {
succeededTaskCount++;
}
}
Assert.assertEquals("Should have total two tasks.", 2, taskList.length);
Assert.assertEquals("Should have only one succeeded task.", 1, succeededTaskCount);
} catch (IOException e) {
Assert.assertNull("Failed to initialize FileSystem.", e);
}
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceTaskData in project dr-elephant by linkedin.
the class JobQueueLimitHeuristicTest method analyzeJob.
private Severity analyzeJob(long runtimeMs, String queueName) throws IOException {
MapReduceCounterData dummyCounter = new MapReduceCounterData();
MapReduceTaskData[] mappers = new MapReduceTaskData[(2 * NUM_TASKS / 3) + 1];
MapReduceTaskData[] reducers = new MapReduceTaskData[(NUM_TASKS / 3) + 1];
Properties jobConf = new Properties();
jobConf.put("mapred.job.queue.name", queueName);
int i = 0;
for (; i < 2 * NUM_TASKS / 3; i++) {
mappers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
mappers[i].setTimeAndCounter(new long[] { runtimeMs, 0, 0, 0, 0 }, dummyCounter);
}
// Non-sampled task, which does not contain time and counter data
mappers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
for (i = 0; i < NUM_TASKS / 3; i++) {
reducers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
reducers[i].setTimeAndCounter(new long[] { runtimeMs, 0, 0, 0, 0 }, dummyCounter);
}
// Non-sampled task, which does not contain time and counter data
reducers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
MapReduceApplicationData data = new MapReduceApplicationData().setCounters(dummyCounter).setReducerData(reducers).setMapperData(mappers).setJobConf(jobConf);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
use of com.linkedin.drelephant.mapreduce.data.MapReduceTaskData in project dr-elephant by linkedin.
the class MapperGCHeuristicTest method analyzeJob.
private Severity analyzeJob(long runtimeMs, long cpuMs, long gcMs) throws IOException {
MapReduceCounterData jobCounter = new MapReduceCounterData();
MapReduceTaskData[] mappers = new MapReduceTaskData[NUMTASKS + 1];
MapReduceCounterData counter = new MapReduceCounterData();
counter.set(MapReduceCounterData.CounterName.CPU_MILLISECONDS, cpuMs);
counter.set(MapReduceCounterData.CounterName.GC_MILLISECONDS, gcMs);
int i = 0;
for (; i < NUMTASKS; i++) {
mappers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
mappers[i].setTimeAndCounter(new long[] { runtimeMs, 0, 0, 0, 0 }, counter);
}
// Non-sampled task, which does not contain time and counter data
mappers[i] = new MapReduceTaskData("task-id-" + i, "task-attempt-id-" + i);
MapReduceApplicationData data = new MapReduceApplicationData().setCounters(jobCounter).setMapperData(mappers);
HeuristicResult result = _heuristic.apply(data);
return result.getSeverity();
}
Aggregations