Search in sources :

Example 16 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperDataSkewHeuristicTest method analyzeJobTime.

private Severity analyzeJobTime(int numSmallTasks, int numLongTasks, long smallTimeTaken, long longTimeTaken) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] mappers = new TezTaskData[numSmallTasks + numLongTasks + 1];
    int i = 0;
    for (; i < numSmallTasks; i++) {
        mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        mappers[i].setTotalTimeMs(smallTimeTaken, true);
        mappers[i].setCounter(jobCounter);
    }
    for (; i < numSmallTasks + numLongTasks; i++) {
        mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        mappers[i].setTotalTimeMs(longTimeTaken, true);
        mappers[i].setCounter(jobCounter);
    }
    // Non-sampled task, which does not contain time data
    mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData)

Example 17 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperMemoryHeuristicTest method analyzeJob.

private Severity analyzeJob(long taskAvgMemMB, long containerMemMB) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] mappers = new TezTaskData[NUMTASKS + 1];
    TezCounterData counter = new TezCounterData();
    counter.set(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES, taskAvgMemMB * FileUtils.ONE_MB);
    Properties p = new Properties();
    p.setProperty(MapperMemoryHeuristic.MAPPER_MEMORY_CONF, Long.toString(containerMemMB));
    int i = 0;
    for (; i < NUMTASKS; i++) {
        mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        mappers[i].setTime(new long[5]);
        mappers[i].setCounter(counter);
    }
    // Non-sampled task, which does not contain time and counter data
    mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
    data.setConf(p);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) Properties(java.util.Properties) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 18 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class MapperTimeHeuristicTest method analyzeJob.

private Severity analyzeJob(int numTasks, long runtime) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] mappers = new TezTaskData[numTasks + 1];
    TezCounterData taskCounter = new TezCounterData();
    taskCounter.set(TezCounterData.CounterName.S3A_BYTES_READ, DUMMY_INPUT_SIZE / 4);
    int i = 0;
    for (; i < numTasks; i++) {
        mappers[i] = new TezTaskData(jobCounter, new long[] { runtime, 0, 0, 0, 0 });
    }
    // Non-sampled task, which does not contain time and counter data
    mappers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setMapTaskData(mappers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) HeuristicResult(com.linkedin.drelephant.analysis.HeuristicResult)

Example 19 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class ReducerDataSkewHeuristicTest method analyzeJobTime.

private Severity analyzeJobTime(int numSmallTasks, int numLongTasks, long smallTimeTaken, long longTimeTaken) throws IOException {
    TezCounterData jobCounter = new TezCounterData();
    TezTaskData[] reducers = new TezTaskData[numSmallTasks + numLongTasks + 1];
    int i = 0;
    for (; i < numSmallTasks; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTotalTimeMs(smallTimeTaken, true);
        reducers[i].setCounter(jobCounter);
    }
    for (; i < numSmallTasks + numLongTasks; i++) {
        reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
        reducers[i].setTotalTimeMs(longTimeTaken, true);
        reducers[i].setCounter(jobCounter);
    }
    // Non-sampled task, which does not contain time data
    reducers[i] = new TezTaskData("task-id-" + i, "task-attempt-id-" + i);
    TezApplicationData data = new TezApplicationData().setCounters(jobCounter).setReduceTaskData(reducers);
    HeuristicResult result = _heuristic.apply(data);
    return result.getSeverity();
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData)

Example 20 with TezTaskData

use of com.linkedin.drelephant.tez.data.TezTaskData in project dr-elephant by linkedin.

the class TezFetcher method fetchData.

public TezApplicationData fetchData(AnalyticJob analyticJob) throws IOException, AuthenticationException {
    int maxSize = 0;
    String appId = analyticJob.getAppId();
    TezApplicationData jobData = new TezApplicationData();
    jobData.setAppId(appId);
    Properties jobConf = _jsonFactory.getProperties(_urlFactory.getApplicationURL(appId));
    jobData.setConf(jobConf);
    URL dagIdsUrl = _urlFactory.getDagURLByTezApplicationId(appId);
    List<String> dagIdsByApplicationId = _jsonFactory.getDagIdsByApplicationId(dagIdsUrl);
    List<TezTaskData> mapperListAggregate = new ArrayList<TezTaskData>();
    List<TezTaskData> reducerListAggregate = new ArrayList<TezTaskData>();
    // Iterate over dagIds and choose the dagId with the highest no. of tasks/highest impact as settings changes can be made only at DAG level.
    for (String dagId : dagIdsByApplicationId) {
        try {
            // set job task independent properties
            URL dagUrl = _urlFactory.getDagURL(dagId);
            String state = _jsonFactory.getState(dagUrl);
            jobData.setStartTime(_jsonFactory.getDagStartTime(dagUrl));
            jobData.setFinishTime(_jsonFactory.getDagEndTime(dagUrl));
            if (state.equals("SUCCEEDED")) {
                jobData.setSucceeded(true);
                List<TezTaskData> mapperList = new ArrayList<TezTaskData>();
                List<TezTaskData> reducerList = new ArrayList<TezTaskData>();
                // Fetch task data
                URL vertexListUrl = _urlFactory.getVertexListURL(dagId);
                _jsonFactory.getTaskDataAll(vertexListUrl, dagId, mapperList, reducerList);
                if (mapperList.size() + reducerList.size() > maxSize) {
                    mapperListAggregate = mapperList;
                    reducerListAggregate = reducerList;
                    maxSize = mapperList.size() + reducerList.size();
                }
            }
            if (state.equals("FAILED")) {
                jobData.setSucceeded(false);
            }
        } finally {
            ThreadContextMR2.updateAuthToken();
        }
    }
    TezTaskData[] mapperData = mapperListAggregate.toArray(new TezTaskData[mapperListAggregate.size()]);
    TezTaskData[] reducerData = reducerListAggregate.toArray(new TezTaskData[reducerListAggregate.size()]);
    TezCounterData dagCounter = _jsonFactory.getDagCounter(_urlFactory.getDagURL(_jsonFactory.getDagIdsByApplicationId(dagIdsUrl).get(0)));
    jobData.setCounters(dagCounter).setMapTaskData(mapperData).setReduceTaskData(reducerData);
    return jobData;
}
Also used : TezCounterData(com.linkedin.drelephant.tez.data.TezCounterData) TezTaskData(com.linkedin.drelephant.tez.data.TezTaskData) TezApplicationData(com.linkedin.drelephant.tez.data.TezApplicationData) URL(java.net.URL)

Aggregations

TezTaskData (com.linkedin.drelephant.tez.data.TezTaskData)22 TezCounterData (com.linkedin.drelephant.tez.data.TezCounterData)17 HeuristicResult (com.linkedin.drelephant.analysis.HeuristicResult)15 TezApplicationData (com.linkedin.drelephant.tez.data.TezApplicationData)13 Severity (com.linkedin.drelephant.analysis.Severity)7 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)2 URL (java.net.URL)1 Test (org.junit.Test)1