Search in sources :

Example 11 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class TaskAssignmentAnalyzer method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    Map<String, Integer> map = new HashMap<>();
    for (VertexInfo vertex : dagInfo.getVertices()) {
        map.clear();
        for (TaskAttemptInfo attempt : vertex.getTaskAttempts()) {
            Integer previousValue = map.get(attempt.getNodeId());
            map.put(attempt.getNodeId(), previousValue == null ? 1 : previousValue + 1);
        }
        double mean = vertex.getTaskAttempts().size() / Math.max(1.0, map.size());
        for (Map.Entry<String, Integer> assignment : map.entrySet()) {
            addARecord(vertex.getVertexName(), assignment.getKey(), assignment.getValue(), assignment.getValue() * 100 / mean);
        }
    }
}
Also used : HashMap(java.util.HashMap) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) Map(java.util.Map) HashMap(java.util.HashMap)

Example 12 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class CriticalPathAnalyzer method determineConcurrency.

private void determineConcurrency(DagInfo dag) {
    ArrayList<TimeInfo> timeInfo = Lists.newArrayList();
    for (VertexInfo v : dag.getVertices()) {
        for (TaskInfo t : v.getTasks()) {
            for (TaskAttemptInfo a : t.getTaskAttempts()) {
                if (a.getStartTime() > 0) {
                    timeInfo.add(new TimeInfo(a.getStartTime(), true));
                    timeInfo.add(new TimeInfo(a.getFinishTime(), false));
                }
            }
        }
    }
    Collections.sort(timeInfo);
    int concurrency = 0;
    TimeInfo lastTimeInfo = null;
    for (TimeInfo t : timeInfo) {
        concurrency += (t.start) ? 1 : -1;
        maxConcurrency = (concurrency > maxConcurrency) ? concurrency : maxConcurrency;
        if (lastTimeInfo == null || lastTimeInfo.timestamp < t.timestamp) {
            lastTimeInfo = t;
            lastTimeInfo.count = concurrency;
            concurrencyByTime.add(lastTimeInfo);
        } else {
            // lastTimeInfo.timestamp == t.timestamp
            lastTimeInfo.count = concurrency;
        }
    }
// for (TimeInfo t : concurrencyByTime) {
// System.out.println(t.timestamp + " " + t.count);
// }
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Example 13 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class CriticalPathAnalyzer method analyzeAllocationOverhead.

private void analyzeAllocationOverhead(DagInfo dag) {
    List<TaskAttemptInfo> preemptedAttempts = Lists.newArrayList();
    for (VertexInfo v : dag.getVertices()) {
        for (TaskInfo t : v.getTasks()) {
            for (TaskAttemptInfo a : t.getTaskAttempts()) {
                if (a.getTerminationCause().equals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION.name())) {
                    LOG.debug("Found preempted attempt " + a.getTaskAttemptId());
                    preemptedAttempts.add(a);
                }
            }
        }
    }
    for (int i = 0; i < criticalPath.size(); ++i) {
        CriticalPathStep step = criticalPath.get(i);
        TaskAttemptInfo attempt = step.attempt;
        if (step.getType() != EntityType.ATTEMPT) {
            continue;
        }
        long creationTime = attempt.getCreationTime();
        long allocationTime = attempt.getAllocationTime();
        long finishTime = attempt.getFinishTime();
        if (allocationTime < step.startCriticalPathTime) {
            // allocated before it became critical
            continue;
        }
        // the attempt is critical before allocation. So allocation overhead needs analysis
        Container container = attempt.getContainer();
        if (container != null) {
            Collection<TaskAttemptInfo> attempts = dag.getContainerMapping().get(container);
            if (attempts != null && !attempts.isEmpty()) {
                // arrange attempts by allocation time
                List<TaskAttemptInfo> attemptsList = Lists.newArrayList(attempts);
                Collections.sort(attemptsList, TaskAttemptInfo.orderingOnAllocationTime());
                // walk the list to record allocation time before the current attempt
                long containerPreviousAllocatedTime = 0;
                int reUsesForVertex = 1;
                for (TaskAttemptInfo containerAttempt : attemptsList) {
                    if (containerAttempt.getTaskAttemptId().equals(attempt.getTaskAttemptId())) {
                        break;
                    }
                    if (containerAttempt.getTaskInfo().getVertexInfo().getVertexId().equals(attempt.getTaskInfo().getVertexInfo().getVertexId())) {
                        // another task from the same vertex ran in this container. So there are multiple
                        // waves for this vertex on this container.
                        reUsesForVertex++;
                    }
                    long cAllocTime = containerAttempt.getAllocationTime();
                    long cFinishTime = containerAttempt.getFinishTime();
                    if (cFinishTime > creationTime) {
                        // for containerAttempts that used the container while this attempt was waiting
                        // add up time container was allocated to containerAttempt. Account for allocations
                        // that started before this attempt was created.
                        containerPreviousAllocatedTime += (cFinishTime - (cAllocTime > creationTime ? cAllocTime : creationTime));
                    }
                }
                int numVertexTasks = attempt.getTaskInfo().getVertexInfo().getNumTasks();
                int intervalMaxConcurrency = getIntervalMaxConcurrency(creationTime, finishTime);
                double numWaves = getWaves(numVertexTasks, intervalMaxConcurrency);
                if (reUsesForVertex > 1) {
                    step.notes.add("Container ran multiple tasks for this vertex. ");
                    if (numWaves < 1) {
                        // less than 1 wave total but still ran more than 1 on this container
                        step.notes.add("Vertex potentially seeing contention from other branches in the DAG. ");
                    }
                }
                if (containerPreviousAllocatedTime == 0) {
                    step.notes.add("Container newly allocated.");
                } else {
                    if (containerPreviousAllocatedTime >= attempt.getCreationToAllocationTimeInterval()) {
                        step.notes.add("Container was fully allocated");
                    } else {
                        step.notes.add("Container in use for " + SVGUtils.getTimeStr(containerPreviousAllocatedTime) + " out of " + SVGUtils.getTimeStr(attempt.getCreationToAllocationTimeInterval()) + " of allocation wait time");
                    }
                }
            }
            // look for internal preemptions while attempt was waiting for allocation
            for (TaskAttemptInfo a : preemptedAttempts) {
                if (a.getTaskInfo().getVertexInfo().getVertexId().equals(attempt.getTaskInfo().getVertexInfo().getVertexId())) {
                    // dont preempt same vertex task. ideally this should look at priority but we dont have it
                    continue;
                }
                if (a.getFinishTime() > creationTime && a.getFinishTime() < allocationTime) {
                    // found an attempt that was preempted within this time interval
                    step.notes.add("Potentially waited for preemption of " + a.getShortName());
                }
            }
        }
    }
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) Container(org.apache.tez.history.parser.datamodel.Container) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Example 14 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class SpillAnalyzerImpl method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        String vertexName = vertexInfo.getVertexName();
        for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
            // Get ADDITIONAL_SPILL_COUNT, OUTPUT_BYTES for every source
            Map<String, TezCounter> spillCountMap = attemptInfo.getCounter(TaskCounter.ADDITIONAL_SPILL_COUNT.name());
            Map<String, TezCounter> spilledRecordsMap = attemptInfo.getCounter(TaskCounter.SPILLED_RECORDS.name());
            Map<String, TezCounter> outputRecordsMap = attemptInfo.getCounter(TaskCounter.OUTPUT_RECORDS.name());
            Map<String, TezCounter> outputBytesMap = attemptInfo.getCounter(TaskCounter.OUTPUT_BYTES.name());
            for (Map.Entry<String, TezCounter> entry : spillCountMap.entrySet()) {
                String source = entry.getKey();
                long spillCount = entry.getValue().getValue();
                long outBytes = outputBytesMap.get(source).getValue();
                long outputRecords = outputRecordsMap.get(source).getValue();
                long spilledRecords = spilledRecordsMap.get(source).getValue();
                if (spillCount > 1 && outBytes > minOutputBytesPerTask) {
                    List<String> recorList = Lists.newLinkedList();
                    recorList.add(vertexName);
                    recorList.add(attemptInfo.getTaskAttemptId());
                    recorList.add(attemptInfo.getNodeId());
                    recorList.add(source);
                    recorList.add(spillCount + "");
                    recorList.add(attemptInfo.getTimeTaken() + "");
                    recorList.add(outBytes + "");
                    recorList.add(outputRecords + "");
                    recorList.add(spilledRecords + "");
                    recorList.add("Consider increasing " + TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB + ". Try increasing container size.");
                    csvResult.addRecord(recorList.toArray(new String[recorList.size()]));
                }
            }
        }
    }
}
Also used : TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) TezCounter(org.apache.tez.common.counters.TezCounter) Map(java.util.Map)

Example 15 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class VertexLevelCriticalPathAnalyzer method getCriticalPath.

private static void getCriticalPath(String predecessor, VertexInfo dest, long time, Map<String, Long> result) {
    String destVertexName = (dest != null) ? (dest.getVertexName()) : "";
    if (dest != null) {
        time += dest.getTimeTaken();
        predecessor += destVertexName + CONNECTOR;
        for (VertexInfo incomingVertex : dest.getInputVertices()) {
            getCriticalPath(predecessor, incomingVertex, time, result);
        }
        result.put(predecessor, time);
    }
}
Also used : VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Aggregations

VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)22 TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)15 TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)6 Map (java.util.Map)5 TezCounter (org.apache.tez.common.counters.TezCounter)4 EdgeInfo (org.apache.tez.history.parser.datamodel.EdgeInfo)3 Container (org.apache.tez.history.parser.datamodel.Container)2 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)2 HashMap (java.util.HashMap)1 Scanner (java.util.Scanner)1 DAGCounter (org.apache.tez.common.counters.DAGCounter)1 TaskCounter (org.apache.tez.common.counters.TaskCounter)1 TezException (org.apache.tez.dag.api.TezException)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezTaskID (org.apache.tez.dag.records.TezTaskID)1 TezVertexID (org.apache.tez.dag.records.TezVertexID)1 Graph (org.apache.tez.dag.utils.Graph)1 WordCount (org.apache.tez.examples.WordCount)1 AdditionalInputOutputDetails (org.apache.tez.history.parser.datamodel.AdditionalInputOutputDetails)1