use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class ATSFileParser method processVertices.
/**
* Parse vertices json
*
* @param verticesJson
* @throws JSONException
*/
private void processVertices(JSONArray verticesJson) throws JSONException {
// Process vertex information
Preconditions.checkState(verticesJson != null, "Vertex json can not be null");
if (verticesJson != null) {
LOG.debug("Started parsing vertex");
for (int i = 0; i < verticesJson.length(); i++) {
VertexInfo vertexInfo = VertexInfo.create(verticesJson.getJSONObject(i));
vertexList.add(vertexInfo);
}
LOG.debug("Finished parsing vertex");
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class TestHistoryParser method testParserWithFailedJob.
/**
* Run a failed job and parse the data from ATS
*/
@Test
public void testParserWithFailedJob() throws Exception {
// Run a job which would fail
String dagId = runWordCount(WordCount.TokenProcessor.class.getName(), FailProcessor.class.getName(), "WordCount-With-Exception", true);
// Export the data from ATS
String[] args = { "--dagId=" + dagId, "--downloadDir=" + DOWNLOAD_DIR, "--yarnTimelineAddress=" + yarnTimelineAddress };
int result = ATSImportTool.process(args);
assertTrue(result == 0);
// Parse ATS data
DagInfo dagInfo = getDagInfo(dagId);
// Applicable for ATS dataset
checkConfig(dagInfo);
// Verify DAGInfo. Verifies vertex, task, taskAttempts in recursive manner
verifyDagInfo(dagInfo, true);
// Dag specific
VertexInfo summationVertex = dagInfo.getVertex(SUMMATION);
// 1 task, 4 attempts failed
assertTrue(summationVertex.getFailedTasks().size() == 1);
assertTrue(summationVertex.getFailedTasks().get(0).getFailedTaskAttempts().size() == 4);
assertTrue(summationVertex.getStatus().equals(VertexState.FAILED.toString()));
assertTrue(dagInfo.getFailedVertices().size() == 1);
assertTrue(dagInfo.getFailedVertices().get(0).getVertexName().equals(SUMMATION));
assertTrue(dagInfo.getSuccessfullVertices().size() == 1);
assertTrue(dagInfo.getSuccessfullVertices().get(0).getVertexName().equals(TOKENIZER));
assertTrue(dagInfo.getStatus().equals(DAGState.FAILED.toString()));
verifyCounter(dagInfo.getCounter(DAGCounter.NUM_FAILED_TASKS.toString()), null, 4);
verifyCounter(dagInfo.getCounter(DAGCounter.NUM_SUCCEEDED_TASKS.toString()), null, 1);
verifyCounter(dagInfo.getCounter(DAGCounter.TOTAL_LAUNCHED_TASKS.toString()), null, 5);
verifyCounter(dagInfo.getCounter(TaskCounter.INPUT_RECORDS_PROCESSED.toString()), "TaskCounter_Tokenizer_INPUT_Input", 10);
verifyCounter(dagInfo.getCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", 0);
verifyCounter(dagInfo.getCounter(TaskCounter.OUTPUT_RECORDS.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", // Every line has 2 words. 10 lines x 2 words = 20
20);
verifyCounter(dagInfo.getCounter(TaskCounter.SPILLED_RECORDS.toString()), "TaskCounter_Tokenizer_OUTPUT_Summation", // Same as above
20);
for (TaskInfo taskInfo : summationVertex.getTasks()) {
TaskAttemptInfo lastAttempt = null;
for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
if (lastAttempt != null) {
// failed attempt should be causal TA of next attempt
assertTrue(lastAttempt.getTaskAttemptId().equals(attemptInfo.getCreationCausalTA()));
assertTrue(lastAttempt.getTerminationCause() != null);
}
lastAttempt = attemptInfo;
}
}
// TODO: Need to check for SUMMATION vertex counters. Since all attempts are failed, counters are not getting populated.
// TaskCounter.REDUCE_INPUT_RECORDS
// Verify if the processor exception is given in diagnostics
assertTrue(dagInfo.getDiagnostics().contains("Failing this processor for some reason"));
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class ContainerReuseAnalyzer method analyze.
@Override
public void analyze(DagInfo dagInfo) throws TezException {
for (VertexInfo vertexInfo : dagInfo.getVertices()) {
Multimap<Container, TaskAttemptInfo> containers = vertexInfo.getContainersMapping();
for (Container container : containers.keySet()) {
List<String> record = Lists.newLinkedList();
record.add(vertexInfo.getVertexName());
record.add(vertexInfo.getTaskAttempts().size() + "");
record.add(container.getHost());
record.add(container.getId());
record.add(Integer.toString(containers.get(container).size()));
csvResult.addRecord(record.toArray(new String[record.size()]));
}
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class SlowestVertexAnalyzer method analyze.
@Override
public void analyze(DagInfo dagInfo) throws TezException {
for (VertexInfo vertexInfo : dagInfo.getVertices()) {
String vertexName = vertexInfo.getVertexName();
if (vertexInfo.getFirstTaskToStart() == null || vertexInfo.getLastTaskToFinish() == null) {
continue;
}
long totalTime = getTaskRuntime(vertexInfo);
long slowestLastEventTime = Long.MIN_VALUE;
String maxSourceName = "";
taskAttemptRuntimeHistorgram = metrics.histogram(vertexName);
for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
taskAttemptRuntimeHistorgram.update(attemptInfo.getTimeTaken());
// Get the last event received from the incoming vertices
Map<String, TezCounter> lastEventReceivedMap = attemptInfo.getCounter(TaskCounter.LAST_EVENT_RECEIVED.toString());
for (Map.Entry<String, TezCounter> entry : lastEventReceivedMap.entrySet()) {
if (entry.getKey().equals(TaskCounter.class.getName())) {
// getting TaskCounter details as well.
continue;
}
// Find the slowest last event received
if (entry.getValue().getValue() > slowestLastEventTime) {
slowestLastEventTime = entry.getValue().getValue();
maxSourceName = entry.getKey();
}
}
}
long shuffleMax = Long.MIN_VALUE;
String shuffleMaxSource = "";
for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
// Get the last event received from the incoming vertices
Map<String, TezCounter> lastEventReceivedMap = attemptInfo.getCounter(TaskCounter.SHUFFLE_PHASE_TIME.toString());
for (Map.Entry<String, TezCounter> entry : lastEventReceivedMap.entrySet()) {
if (entry.getKey().equals(TaskCounter.class.getName())) {
// ignore. TODO: hack for taskcounter issue
continue;
}
// Find the slowest last event received
if (entry.getValue().getValue() > shuffleMax) {
shuffleMax = entry.getValue().getValue();
shuffleMaxSource = entry.getKey();
}
}
}
String comments = "";
List<String> record = Lists.newLinkedList();
record.add(vertexName);
record.add(vertexInfo.getTaskAttempts().size() + "");
record.add(totalTime + "");
record.add(Math.max(0, shuffleMax) + "");
record.add(shuffleMaxSource);
record.add(Math.max(0, slowestLastEventTime) + "");
record.add(maxSourceName);
// Finding out real_work done at vertex level might be meaningless (as it is quite posisble
// that it went to starvation).
StringBuilder sb = new StringBuilder();
double percentile75 = taskAttemptRuntimeHistorgram.getSnapshot().get75thPercentile();
double percentile95 = taskAttemptRuntimeHistorgram.getSnapshot().get95thPercentile();
double percentile98 = taskAttemptRuntimeHistorgram.getSnapshot().get98thPercentile();
double percentile99 = taskAttemptRuntimeHistorgram.getSnapshot().get99thPercentile();
double medianAttemptRuntime = taskAttemptRuntimeHistorgram.getSnapshot().getMedian();
record.add("75th=" + percentile75);
record.add("95th=" + percentile95);
record.add("98th=" + percentile98);
record.add("median=" + medianAttemptRuntime);
if (percentile75 / percentile99 < 0.5) {
// looks like some straggler task is there.
sb.append("Looks like some straggler task is there");
}
record.add(sb.toString());
if (totalTime > 0 && vertexInfo.getTaskAttempts().size() > 0) {
if ((shuffleMax * 1.0f / totalTime) > 0.5) {
if ((slowestLastEventTime * 1.0f / totalTime) > 0.5) {
comments = "This vertex is slow due to its dependency on parent. Got a lot delayed last" + " event received";
} else {
comments = "Spending too much time on shuffle. Check shuffle bytes from previous vertex";
}
} else {
if (totalTime > vertexRuntimeThreshold) {
// greater than X seconds.
comments = "Concentrate on this vertex (totalTime > " + vertexRuntimeThreshold + " seconds)";
}
}
}
record.add(comments);
csvResult.addRecord(record.toArray(new String[record.size()]));
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class CriticalPathAnalyzer method analyze.
@Override
public void analyze(DagInfo dagInfo) throws TezException {
// get all attempts in the dag and find the last failed/succeeded attempt.
// ignore killed attempt to handle kills that happen upon dag completion
TaskAttemptInfo lastAttempt = null;
long lastAttemptFinishTime = 0;
for (VertexInfo vertex : dagInfo.getVertices()) {
for (TaskInfo task : vertex.getTasks()) {
for (TaskAttemptInfo attempt : task.getTaskAttempts()) {
attempts.put(attempt.getTaskAttemptId(), attempt);
if (attempt.getStatus().equals(succeededState) || attempt.getStatus().equals(failedState)) {
if (lastAttemptFinishTime < attempt.getFinishTime()) {
lastAttempt = attempt;
lastAttemptFinishTime = attempt.getFinishTime();
}
}
}
}
}
if (lastAttempt == null) {
LOG.info("Cannot find last attempt to finish in DAG " + dagInfo.getDagId());
return;
}
createCriticalPath(dagInfo, lastAttempt, lastAttemptFinishTime, attempts);
analyzeCriticalPath(dagInfo);
if (getConf().getBoolean(DRAW_SVG, true)) {
saveCriticalPathAsSVG(dagInfo);
}
}
Aggregations