Search in sources :

Example 1 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class ATSFileParser method processVertices.

/**
 * Parse vertices json
 *
 * @param verticesJson
 * @throws JSONException
 */
private void processVertices(JSONArray verticesJson) throws JSONException {
    // Process vertex information
    Preconditions.checkState(verticesJson != null, "Vertex json can not be null");
    if (verticesJson != null) {
        LOG.debug("Started parsing vertex");
        for (int i = 0; i < verticesJson.length(); i++) {
            VertexInfo vertexInfo = VertexInfo.create(verticesJson.getJSONObject(i));
            vertexList.add(vertexInfo);
        }
        LOG.debug("Finished parsing vertex");
    }
}
Also used : VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Example 2 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class TestHistoryParser method testParserWithFailedJob.

/**
 * Run a failed job and parse the data from ATS
 */
@Test
public void testParserWithFailedJob() throws Exception {
    // Run a job which would fail
    String dagId = runWordCount(WordCount.TokenProcessor.class.getName(), FailProcessor.class.getName(), "WordCount-With-Exception", true);
    // Export the data from ATS
    String[] args = { "--dagId=" + dagId, "--downloadDir=" + DOWNLOAD_DIR, "--yarnTimelineAddress=" + yarnTimelineAddress };
    int result = ATSImportTool.process(args);
    assertTrue(result == 0);
    // Parse ATS data
    DagInfo dagInfo = getDagInfo(dagId);
    // Applicable for ATS dataset
    checkConfig(dagInfo);
    // Verify DAGInfo. Verifies vertex, task, taskAttempts in recursive manner
    verifyDagInfo(dagInfo, true);
    // Dag specific
    VertexInfo summationVertex = dagInfo.getVertex(SUMMATION);
    // 1 task, 4 attempts failed
    assertTrue(summationVertex.getFailedTasks().size() == 1);
    assertTrue(summationVertex.getFailedTasks().get(0).getFailedTaskAttempts().size() == 4);
    assertTrue(summationVertex.getStatus().equals(VertexState.FAILED.toString()));
    assertTrue(dagInfo.getFailedVertices().size() == 1);
    assertTrue(dagInfo.getFailedVertices().get(0).getVertexName().equals(SUMMATION));
    assertTrue(dagInfo.getSuccessfullVertices().size() == 1);
    assertTrue(dagInfo.getSuccessfullVertices().get(0).getVertexName().equals(TOKENIZER));
    assertTrue(dagInfo.getStatus().equals(DAGState.FAILED.toString()));
    verifyCounter(dagInfo.getCounter(DAGCounter.NUM_FAILED_TASKS.toString()), null, 4);
    verifyCounter(dagInfo.getCounter(DAGCounter.NUM_SUCCEEDED_TASKS.toString()), null, 1);
    verifyCounter(dagInfo.getCounter(DAGCounter.TOTAL_LAUNCHED_TASKS.toString()), null, 5);
    verifyCounter(dagInfo.getCounter(TaskCounter.INPUT_RECORDS_PROCESSED.toString()), "TaskCounter_Tokenizer_INPUT_Input", 10);
    verifyCounter(dagInfo.getCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", 0);
    verifyCounter(dagInfo.getCounter(TaskCounter.OUTPUT_RECORDS.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", // Every line has 2 words. 10 lines x 2 words = 20
    20);
    verifyCounter(dagInfo.getCounter(TaskCounter.SPILLED_RECORDS.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", // Same as above
    20);
    for (TaskInfo taskInfo : summationVertex.getTasks()) {
        TaskAttemptInfo lastAttempt = null;
        for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
            if (lastAttempt != null) {
                // failed attempt should be causal TA of next attempt
                assertTrue(lastAttempt.getTaskAttemptId().equals(attemptInfo.getCreationCausalTA()));
                assertTrue(lastAttempt.getTerminationCause() != null);
            }
            lastAttempt = attemptInfo;
        }
    }
    // TODO: Need to check for SUMMATION vertex counters. Since all attempts are failed, counters are not getting populated.
    // TaskCounter.REDUCE_INPUT_RECORDS
    // Verify if the processor exception is given in diagnostics
    assertTrue(dagInfo.getDiagnostics().contains("Failing this processor for some reason"));
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DagInfo(org.apache.tez.history.parser.datamodel.DagInfo) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) Test(org.junit.Test)

Example 3 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class ContainerReuseAnalyzer method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        Multimap<Container, TaskAttemptInfo> containers = vertexInfo.getContainersMapping();
        for (Container container : containers.keySet()) {
            List<String> record = Lists.newLinkedList();
            record.add(vertexInfo.getVertexName());
            record.add(vertexInfo.getTaskAttempts().size() + "");
            record.add(container.getHost());
            record.add(container.getId());
            record.add(Integer.toString(containers.get(container).size()));
            csvResult.addRecord(record.toArray(new String[record.size()]));
        }
    }
}
Also used : Container(org.apache.tez.history.parser.datamodel.Container) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Example 4 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class SlowestVertexAnalyzer method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        String vertexName = vertexInfo.getVertexName();
        if (vertexInfo.getFirstTaskToStart() == null || vertexInfo.getLastTaskToFinish() == null) {
            continue;
        }
        long totalTime = getTaskRuntime(vertexInfo);
        long slowestLastEventTime = Long.MIN_VALUE;
        String maxSourceName = "";
        taskAttemptRuntimeHistorgram = metrics.histogram(vertexName);
        for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
            taskAttemptRuntimeHistorgram.update(attemptInfo.getTimeTaken());
            // Get the last event received from the incoming vertices
            Map<String, TezCounter> lastEventReceivedMap = attemptInfo.getCounter(TaskCounter.LAST_EVENT_RECEIVED.toString());
            for (Map.Entry<String, TezCounter> entry : lastEventReceivedMap.entrySet()) {
                if (entry.getKey().equals(TaskCounter.class.getName())) {
                    // getting TaskCounter details as well.
                    continue;
                }
                // Find the slowest last event received
                if (entry.getValue().getValue() > slowestLastEventTime) {
                    slowestLastEventTime = entry.getValue().getValue();
                    maxSourceName = entry.getKey();
                }
            }
        }
        long shuffleMax = Long.MIN_VALUE;
        String shuffleMaxSource = "";
        for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
            // Get the last event received from the incoming vertices
            Map<String, TezCounter> lastEventReceivedMap = attemptInfo.getCounter(TaskCounter.SHUFFLE_PHASE_TIME.toString());
            for (Map.Entry<String, TezCounter> entry : lastEventReceivedMap.entrySet()) {
                if (entry.getKey().equals(TaskCounter.class.getName())) {
                    // ignore. TODO: hack for taskcounter issue
                    continue;
                }
                // Find the slowest last event received
                if (entry.getValue().getValue() > shuffleMax) {
                    shuffleMax = entry.getValue().getValue();
                    shuffleMaxSource = entry.getKey();
                }
            }
        }
        String comments = "";
        List<String> record = Lists.newLinkedList();
        record.add(vertexName);
        record.add(vertexInfo.getTaskAttempts().size() + "");
        record.add(totalTime + "");
        record.add(Math.max(0, shuffleMax) + "");
        record.add(shuffleMaxSource);
        record.add(Math.max(0, slowestLastEventTime) + "");
        record.add(maxSourceName);
        // Finding out real_work done at vertex level might be meaningless (as it is quite posisble
        // that it went to starvation).
        StringBuilder sb = new StringBuilder();
        double percentile75 = taskAttemptRuntimeHistorgram.getSnapshot().get75thPercentile();
        double percentile95 = taskAttemptRuntimeHistorgram.getSnapshot().get95thPercentile();
        double percentile98 = taskAttemptRuntimeHistorgram.getSnapshot().get98thPercentile();
        double percentile99 = taskAttemptRuntimeHistorgram.getSnapshot().get99thPercentile();
        double medianAttemptRuntime = taskAttemptRuntimeHistorgram.getSnapshot().getMedian();
        record.add("75th=" + percentile75);
        record.add("95th=" + percentile95);
        record.add("98th=" + percentile98);
        record.add("median=" + medianAttemptRuntime);
        if (percentile75 / percentile99 < 0.5) {
            // looks like some straggler task is there.
            sb.append("Looks like some straggler task is there");
        }
        record.add(sb.toString());
        if (totalTime > 0 && vertexInfo.getTaskAttempts().size() > 0) {
            if ((shuffleMax * 1.0f / totalTime) > 0.5) {
                if ((slowestLastEventTime * 1.0f / totalTime) > 0.5) {
                    comments = "This vertex is slow due to its dependency on parent. Got a lot delayed last" + " event received";
                } else {
                    comments = "Spending too much time on shuffle. Check shuffle bytes from previous vertex";
                }
            } else {
                if (totalTime > vertexRuntimeThreshold) {
                    // greater than X seconds.
                    comments = "Concentrate on this vertex (totalTime > " + vertexRuntimeThreshold + " seconds)";
                }
            }
        }
        record.add(comments);
        csvResult.addRecord(record.toArray(new String[record.size()]));
    }
}
Also used : TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) TezCounter(org.apache.tez.common.counters.TezCounter) Map(java.util.Map) TaskCounter(org.apache.tez.common.counters.TaskCounter)

Example 5 with VertexInfo

use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.

the class CriticalPathAnalyzer method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    // get all attempts in the dag and find the last failed/succeeded attempt.
    // ignore killed attempt to handle kills that happen upon dag completion
    TaskAttemptInfo lastAttempt = null;
    long lastAttemptFinishTime = 0;
    for (VertexInfo vertex : dagInfo.getVertices()) {
        for (TaskInfo task : vertex.getTasks()) {
            for (TaskAttemptInfo attempt : task.getTaskAttempts()) {
                attempts.put(attempt.getTaskAttemptId(), attempt);
                if (attempt.getStatus().equals(succeededState) || attempt.getStatus().equals(failedState)) {
                    if (lastAttemptFinishTime < attempt.getFinishTime()) {
                        lastAttempt = attempt;
                        lastAttemptFinishTime = attempt.getFinishTime();
                    }
                }
            }
        }
    }
    if (lastAttempt == null) {
        LOG.info("Cannot find last attempt to finish in DAG " + dagInfo.getDagId());
        return;
    }
    createCriticalPath(dagInfo, lastAttempt, lastAttemptFinishTime, attempts);
    analyzeCriticalPath(dagInfo);
    if (getConf().getBoolean(DRAW_SVG, true)) {
        saveCriticalPathAsSVG(dagInfo);
    }
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Aggregations

VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)22 TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)15 TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)6 Map (java.util.Map)5 TezCounter (org.apache.tez.common.counters.TezCounter)4 EdgeInfo (org.apache.tez.history.parser.datamodel.EdgeInfo)3 Container (org.apache.tez.history.parser.datamodel.Container)2 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)2 HashMap (java.util.HashMap)1 Scanner (java.util.Scanner)1 DAGCounter (org.apache.tez.common.counters.DAGCounter)1 TaskCounter (org.apache.tez.common.counters.TaskCounter)1 TezException (org.apache.tez.dag.api.TezException)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezTaskID (org.apache.tez.dag.records.TezTaskID)1 TezVertexID (org.apache.tez.dag.records.TezVertexID)1 Graph (org.apache.tez.dag.utils.Graph)1 WordCount (org.apache.tez.examples.WordCount)1 AdditionalInputOutputDetails (org.apache.tez.history.parser.datamodel.AdditionalInputOutputDetails)1