Search in sources :

Example 1 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperSpeedHeuristic method apply.

public HeuristicResult apply(TezApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    TezTaskData[] tasks = data.getMapTaskData();
    List<Long> inputSizes = new ArrayList<Long>();
    List<Long> speeds = new ArrayList<Long>();
    List<Long> runtimesMs = new ArrayList<Long>();
    for (TezTaskData task : tasks) {
        if (task.isSampled()) {
            long inputBytes = 0;
            for (TezCounterData.CounterName counterName : _counterNames) {
                inputBytes += task.getCounters().get(counterName);
            }
            long runtimeMs = task.getTotalRunTimeMs();
            inputSizes.add(inputBytes);
            runtimesMs.add(runtimeMs);
            // Speed is records per second
            speeds.add((1000 * inputBytes) / (runtimeMs));
        }
    }
    long medianSpeed;
    long medianSize;
    long medianRuntimeMs;
    if (tasks.length != 0) {
        medianSpeed = Statistics.median(speeds);
        medianSize = Statistics.median(inputSizes);
        medianRuntimeMs = Statistics.median(runtimesMs);
    } else {
        medianSpeed = 0;
        medianSize = 0;
        medianRuntimeMs = 0;
    }
    Severity severity = getDiskSpeedSeverity(medianSpeed);
    // This reduces severity if task runtime is insignificant
    severity = Severity.min(severity, getRuntimeSeverity(medianRuntimeMs));
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Median task input ", FileUtils.byteCountToDisplaySize(medianSize));
    result.addResultDetail("Median task runtime", Statistics.readableTimespan(medianRuntimeMs));
    result.addResultDetail("Median task speed", FileUtils.byteCountToDisplaySize(medianSpeed) + "/s");
    return result;
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) ArrayList(java.util.ArrayList) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 2 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperSpillHeuristic method apply.

@Override
public HeuristicResult apply(TezApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    TezTaskData[] tasks = data.getMapTaskData();
    long totalSpills = 0;
    long totalOutputRecords = 0;
    double ratioSpills = 0.0;
    for (TezTaskData task : tasks) {
        if (task.isSampled()) {
            totalSpills += task.getCounters().get(TezCounterData.CounterName.SPILLED_RECORDS);
            totalOutputRecords += task.getCounters().get(TezCounterData.CounterName.OUTPUT_RECORDS);
        }
    }
    // If both totalSpills and totalOutputRecords are zero then set ratioSpills to zero.
    if (totalSpills == 0) {
        ratioSpills = 0;
    } else {
        ratioSpills = (double) totalSpills / (double) totalOutputRecords;
    }
    Severity severity = getSpillSeverity(ratioSpills);
    // Severity is reduced if number of tasks is small
    Severity taskSeverity = getNumTasksSeverity(tasks.length);
    severity = Severity.min(severity, taskSeverity);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Avg spilled records per task", tasks.length == 0 ? "0" : Long.toString(totalSpills / tasks.length));
    result.addResultDetail("Avg output records per task", tasks.length == 0 ? "0" : Long.toString(totalOutputRecords / tasks.length));
    result.addResultDetail("Ratio of spilled records to output records", Double.toString(ratioSpills));
    return result;
}
Also used : TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 3 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperTimeHeuristic method apply.

public HeuristicResult apply(TezApplicationData data) {
    if (!data.getSucceeded()) {
        return null;
    }
    TezTaskData[] tasks = data.getMapTaskData();
    List<Long> inputSizes = new ArrayList<Long>();
    List<Long> runtimesMs = new ArrayList<Long>();
    long taskMinMs = Long.MAX_VALUE;
    long taskMaxMs = 0;
    for (TezTaskData task : tasks) {
        if (task.isSampled()) {
            long inputByte = 0;
            for (TezCounterData.CounterName counterName : _counterNames) {
                inputByte += task.getCounters().get(counterName);
            }
            inputSizes.add(inputByte);
            long taskTime = task.getTotalRunTimeMs();
            runtimesMs.add(taskTime);
            taskMinMs = Math.min(taskMinMs, taskTime);
            taskMaxMs = Math.max(taskMaxMs, taskTime);
        }
    }
    if (taskMinMs == Long.MAX_VALUE) {
        taskMinMs = 0;
    }
    long averageSize = Statistics.average(inputSizes);
    long averageTimeMs = Statistics.average(runtimesMs);
    Severity shortTaskSeverity = shortTaskSeverity(tasks.length, averageTimeMs);
    Severity longTaskSeverity = longTaskSeverity(tasks.length, averageTimeMs);
    Severity severity = Severity.max(shortTaskSeverity, longTaskSeverity);
    HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));
    result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
    result.addResultDetail("Average task input size", FileUtils.byteCountToDisplaySize(averageSize));
    result.addResultDetail("Average task runtime", Statistics.readableTimespan(averageTimeMs));
    result.addResultDetail("Max task runtime", Statistics.readableTimespan(taskMaxMs));
    result.addResultDetail("Min task runtime", Statistics.readableTimespan(taskMinMs));
    return result;
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Severity(com.linkedin.drelephant.analysis.Severity) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 4 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class TezTaskLevelAggregatedMetricsTest method testTaskLevelData.

@Test
public void testTaskLevelData() {
    TezTaskData[] taskData = new TezTaskData[3];
    TezCounterData counterData = new TezCounterData();
    counterData.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, 655577088L);
    counterData.set(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES, 3051589632L);
    long[] time = { 0, 0, 0, 1464218501117L, 1464218534148L };
    taskData[0] = new TezTaskData("task", "id");
    taskData[0].setTimeAndCounter(time, counterData);
    taskData[1] = new TezTaskData("task", "id");
    taskData[1].setTimeAndCounter(new long[5], counterData);
    // Non-sampled task, which does not contain time and counter data
    taskData[2] = new TezTaskData("task", "id");
    TezTaskLevelAggregatedMetrics taskMetrics = new TezTaskLevelAggregatedMetrics(taskData, 4096L, 1463218501117L);
    Assert.assertEquals(taskMetrics.getDelay(), 1000000000L);
    Assert.assertEquals(taskMetrics.getResourceUsed(), 135168L);
    Assert.assertEquals(taskMetrics.getResourceWasted(), 66627L);
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) Test(org.junit.Test)

Example 5 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperDataSkewHeuristicTest method analyzeJob.

private Severity analyzeJob(int numSmallTasks, int numLargeTasks, long smallInputSize, long largeInputSize) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] mappers = new TezTaskData[numSmallTasks + numLargeTasks + 1];
    TezCounterData smallCounter = new TezCounterData();
    smallCounter.set(TezCounterData.CounterName.HDFS_BYTES_READ, smallInputSize);
    TezCounterData largeCounter = new TezCounterData();
    largeCounter.set(TezCounterData.CounterName.S3A_BYTES_READ, largeInputSize);
    int i = 0;
    for (; i < numSmallTasks; i++) {
        mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        mappers[i].setTimeAndCounter(new long[5], smallCounter);
    }
    for (; i < numSmallTasks + numLargeTasks; i++) {
        mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        mappers[i].setTimeAndCounter(new long[5], largeCounter);
    }
    // Non-sampled task, which does not contain time and counter data
    mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData)

Aggregations

TezTaskData (com.linkedin.drelephant.tez.data.TezTaskData)22 TezCounterData (com.linkedin.drelephant.tez.data.TezCounterData)17 HeuristicResult (com.linkedin.drelephant.analysis.HeuristicResult)15 TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)13 Severity (com.linkedin.drelephant.analysis.Severity)7 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)2 URL (java.net.URL)1 Test (org.junit.Test)1