Search in sources :

Example 1 with EdgeInfo

use of org.apache.tez.history.parser.datamodel.EdgeInfo in project tez by apache.

the class OneOnOneEdgeAnalyzer method analyze.

@Override
public void analyze(DagInfo dagInfo) throws TezException {
    for (VertexInfo v : dagInfo.getVertices()) {
        for (EdgeInfo e : v.getOutputEdges()) {
            if (e.getDataMovementType() != null && e.getDataMovementType().equals(ONE_TO_ONE)) {
                LOG.info("Src --> Dest : {} --> {}", e.getSourceVertex(), e.getDestinationVertex());
                VertexInfo sourceVertex = e.getSourceVertex();
                VertexInfo destinationVertex = e.getDestinationVertex();
                Map<Integer, String> sourceTaskToContainerMap = getContainerMappingForVertex(sourceVertex);
                Map<Integer, String> downStreamTaskToContainerMap = getContainerMappingForVertex(destinationVertex);
                int missedCounter = 0;
                List<String> result = Lists.newLinkedList();
                for (Map.Entry<Integer, String> entry : sourceTaskToContainerMap.entrySet()) {
                    Integer taskId = entry.getKey();
                    String sourceContainerHost = entry.getValue();
                    // check on downstream vertex.
                    String downstreamContainerHost = downStreamTaskToContainerMap.get(taskId);
                    if (downstreamContainerHost != null) {
                        if (!sourceContainerHost.equalsIgnoreCase(downstreamContainerHost)) {
                            // downstream task got scheduled on different machine than src
                            LOG.info("TaskID: {}, source: {}, downStream:{}", taskId, sourceContainerHost, downstreamContainerHost);
                            result.add(sourceVertex.getVertexName());
                            result.add(destinationVertex.getVertexName());
                            result.add(taskId + "");
                            result.add(sourceContainerHost);
                            result.add(downstreamContainerHost);
                            csvResult.addRecord(result.toArray(new String[result.size()]));
                            missedCounter++;
                        }
                    }
                    result.clear();
                }
                LOG.info("Total tasks:{}, miss: {}", sourceTaskToContainerMap.size(), missedCounter);
            }
        }
    }
}
Also used : VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) Map(java.util.Map) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Example 2 with EdgeInfo

use of org.apache.tez.history.parser.datamodel.EdgeInfo in project tez by apache.

the class Utils method generateDAGVizFile.

public static void generateDAGVizFile(DagInfo dagInfo, String fileName, @Nullable List<String> criticalVertices) throws IOException {
    Graph graph = new Graph(sanitizeLabelForViz(dagInfo.getName()));
    for (VertexInfo v : dagInfo.getVertices()) {
        String nodeLabel = sanitizeLabelForViz(v.getVertexName()) + "[" + getShortClassName(v.getProcessorClassName() + ", tasks=" + v.getTasks().size() + ", time=" + v.getTimeTaken() + " ms]");
        Graph.Node n = graph.newNode(sanitizeLabelForViz(v.getVertexName()), nodeLabel);
        boolean criticalVertex = (criticalVertices != null) ? criticalVertices.contains(v.getVertexName()) : false;
        if (criticalVertex) {
            n.setColor("red");
        }
        for (AdditionalInputOutputDetails input : v.getAdditionalInputInfoList()) {
            Graph.Node inputNode = graph.getNode(sanitizeLabelForViz(v.getVertexName()) + "_" + sanitizeLabelForViz(input.getName()));
            inputNode.setLabel(sanitizeLabelForViz(v.getVertexName()) + "[" + sanitizeLabelForViz(input.getName()) + "]");
            inputNode.setShape("box");
            inputNode.addEdge(n, "Input name=" + input.getName() + " [inputClass=" + getShortClassName(input.getClazz()) + ", initializer=" + getShortClassName(input.getInitializer()) + "]");
        }
        for (AdditionalInputOutputDetails output : v.getAdditionalOutputInfoList()) {
            Graph.Node outputNode = graph.getNode(sanitizeLabelForViz(v.getVertexName()) + "_" + sanitizeLabelForViz(output.getName()));
            outputNode.setLabel(sanitizeLabelForViz(v.getVertexName()) + "[" + sanitizeLabelForViz(output.getName()) + "]");
            outputNode.setShape("box");
            n.addEdge(outputNode, "Output name=" + output.getName() + " [outputClass=" + getShortClassName(output.getClazz()) + ", committer=" + getShortClassName(output.getInitializer()) + "]");
        }
    }
    for (EdgeInfo e : dagInfo.getEdges()) {
        Graph.Node n = graph.getNode(sanitizeLabelForViz(e.getInputVertexName()));
        n.addEdge(graph.getNode(sanitizeLabelForViz(e.getOutputVertexName())), "[input=" + getShortClassName(e.getEdgeSourceClass()) + ", output=" + getShortClassName(e.getEdgeDestinationClass()) + ", dataMovement=" + e.getDataMovementType().trim() + "]");
    }
    graph.save(fileName);
}
Also used : Graph(org.apache.tez.dag.utils.Graph) AdditionalInputOutputDetails(org.apache.tez.history.parser.datamodel.AdditionalInputOutputDetails) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Example 3 with EdgeInfo

use of org.apache.tez.history.parser.datamodel.EdgeInfo in project tez by apache.

the class TestHistoryParser method verifyJobSpecificInfo.

private void verifyJobSpecificInfo(DagInfo dagInfo) {
    // Job specific
    assertTrue(dagInfo.getNumVertices() == 2);
    assertTrue(dagInfo.getName().equals("WordCount"));
    assertTrue(dagInfo.getVertex(TOKENIZER).getProcessorClassName().equals(WordCount.TokenProcessor.class.getName()));
    assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName().equals(WordCount.SumProcessor.class.getName()));
    assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
    assertTrue(dagInfo.getEdges().size() == 1);
    EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
    assertTrue(edgeInfo.getDataMovementType().equals(EdgeProperty.DataMovementType.SCATTER_GATHER.toString()));
    assertTrue(edgeInfo.getSourceVertex().getVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getDestinationVertex().getVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getInputVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getOutputVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getEdgeSourceClass().equals(OrderedPartitionedKVOutput.class.getName()));
    assertTrue(edgeInfo.getEdgeDestinationClass().equals(OrderedGroupedKVInput.class.getName()));
    assertTrue(dagInfo.getVertices().size() == 2);
    String lastSourceTA = null;
    String lastDataEventSourceTA = null;
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        assertTrue(vertexInfo.getKilledTasksCount() == 0);
        assertTrue(vertexInfo.getInitRequestedTime() > 0);
        assertTrue(vertexInfo.getInitTime() > 0);
        assertTrue(vertexInfo.getStartRequestedTime() > 0);
        assertTrue(vertexInfo.getStartTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
        long finishTime = 0;
        for (TaskInfo taskInfo : vertexInfo.getTasks()) {
            assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
            assertTrue(taskInfo.getMaxTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getMinTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getAvgTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getLastTaskAttemptToFinish() != null);
            assertTrue(taskInfo.getContainersMapping().size() > 0);
            assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
            assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
            assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
            assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
            List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
            if (vertexInfo.getVertexName().equals(TOKENIZER)) {
                // get the last task to finish and track its successful attempt
                if (finishTime < taskInfo.getFinishTime()) {
                    finishTime = taskInfo.getFinishTime();
                    lastSourceTA = taskInfo.getSuccessfulAttemptId();
                }
            } else {
                for (TaskAttemptInfo attempt : attempts) {
                    DataDependencyEvent item = attempt.getLastDataEvents().get(0);
                    assertTrue(item.getTimestamp() > 0);
                    if (lastDataEventSourceTA == null) {
                        lastDataEventSourceTA = item.getTaskAttemptId();
                    } else {
                        // all attempts should have the same last data event source TA
                        assertTrue(lastDataEventSourceTA.equals(item.getTaskAttemptId()));
                    }
                }
            }
            for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
                assertTrue(attemptInfo.getCreationTime() > 0);
                assertTrue(attemptInfo.getAllocationTime() > 0);
                assertTrue(attemptInfo.getStartTime() > 0);
                assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
            }
        }
        assertTrue(vertexInfo.getLastTaskToFinish() != null);
        if (vertexInfo.getVertexName().equals(TOKENIZER)) {
            assertTrue(vertexInfo.getInputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputVertices().size() == 1);
            assertTrue(vertexInfo.getInputVertices().size() == 0);
        } else {
            assertTrue(vertexInfo.getInputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputVertices().size() == 0);
            assertTrue(vertexInfo.getInputVertices().size() == 1);
        }
    }
    assertTrue(lastSourceTA.equals(lastDataEventSourceTA));
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DataDependencyEvent(org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent) WordCount(org.apache.tez.examples.WordCount) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Aggregations

EdgeInfo (org.apache.tez.history.parser.datamodel.EdgeInfo)3 VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)3 Map (java.util.Map)1 Graph (org.apache.tez.dag.utils.Graph)1 WordCount (org.apache.tez.examples.WordCount)1 AdditionalInputOutputDetails (org.apache.tez.history.parser.datamodel.AdditionalInputOutputDetails)1 TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)1 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)1 TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)1