Search in sources :

Example 6 with TaskInfo

use of org.apache.tez.history.parser.datamodel.TaskInfo in project tez by apache.

the class CriticalPathAnalyzer method analyzeAllocationOverhead.

private void analyzeAllocationOverhead(DagInfo dag) {
    List<TaskAttemptInfo> preemptedAttempts = Lists.newArrayList();
    for (VertexInfo v : dag.getVertices()) {
        for (TaskInfo t : v.getTasks()) {
            for (TaskAttemptInfo a : t.getTaskAttempts()) {
                if (a.getTerminationCause().equals(TaskAttemptTerminationCause.INTERNAL_PREEMPTION.name())) {
                    LOG.debug("Found preempted attempt " + a.getTaskAttemptId());
                    preemptedAttempts.add(a);
                }
            }
        }
    }
    for (int i = 0; i < criticalPath.size(); ++i) {
        CriticalPathStep step = criticalPath.get(i);
        TaskAttemptInfo attempt = step.attempt;
        if (step.getType() != EntityType.ATTEMPT) {
            continue;
        }
        long creationTime = attempt.getCreationTime();
        long allocationTime = attempt.getAllocationTime();
        long finishTime = attempt.getFinishTime();
        if (allocationTime < step.startCriticalPathTime) {
            // allocated before it became critical
            continue;
        }
        // the attempt is critical before allocation. So allocation overhead needs analysis
        Container container = attempt.getContainer();
        if (container != null) {
            Collection<TaskAttemptInfo> attempts = dag.getContainerMapping().get(container);
            if (attempts != null && !attempts.isEmpty()) {
                // arrange attempts by allocation time
                List<TaskAttemptInfo> attemptsList = Lists.newArrayList(attempts);
                Collections.sort(attemptsList, TaskAttemptInfo.orderingOnAllocationTime());
                // walk the list to record allocation time before the current attempt
                long containerPreviousAllocatedTime = 0;
                int reUsesForVertex = 1;
                for (TaskAttemptInfo containerAttempt : attemptsList) {
                    if (containerAttempt.getTaskAttemptId().equals(attempt.getTaskAttemptId())) {
                        break;
                    }
                    if (containerAttempt.getTaskInfo().getVertexInfo().getVertexId().equals(attempt.getTaskInfo().getVertexInfo().getVertexId())) {
                        // another task from the same vertex ran in this container. So there are multiple
                        // waves for this vertex on this container.
                        reUsesForVertex++;
                    }
                    long cAllocTime = containerAttempt.getAllocationTime();
                    long cFinishTime = containerAttempt.getFinishTime();
                    if (cFinishTime > creationTime) {
                        // for containerAttempts that used the container while this attempt was waiting
                        // add up time container was allocated to containerAttempt. Account for allocations
                        // that started before this attempt was created.
                        containerPreviousAllocatedTime += (cFinishTime - (cAllocTime > creationTime ? cAllocTime : creationTime));
                    }
                }
                int numVertexTasks = attempt.getTaskInfo().getVertexInfo().getNumTasks();
                int intervalMaxConcurrency = getIntervalMaxConcurrency(creationTime, finishTime);
                double numWaves = getWaves(numVertexTasks, intervalMaxConcurrency);
                if (reUsesForVertex > 1) {
                    step.notes.add("Container ran multiple tasks for this vertex. ");
                    if (numWaves < 1) {
                        // less than 1 wave total but still ran more than 1 on this container
                        step.notes.add("Vertex potentially seeing contention from other branches in the DAG. ");
                    }
                }
                if (containerPreviousAllocatedTime == 0) {
                    step.notes.add("Container newly allocated.");
                } else {
                    if (containerPreviousAllocatedTime >= attempt.getCreationToAllocationTimeInterval()) {
                        step.notes.add("Container was fully allocated");
                    } else {
                        step.notes.add("Container in use for " + SVGUtils.getTimeStr(containerPreviousAllocatedTime) + " out of " + SVGUtils.getTimeStr(attempt.getCreationToAllocationTimeInterval()) + " of allocation wait time");
                    }
                }
            }
            // look for internal preemptions while attempt was waiting for allocation
            for (TaskAttemptInfo a : preemptedAttempts) {
                if (a.getTaskInfo().getVertexInfo().getVertexId().equals(attempt.getTaskInfo().getVertexInfo().getVertexId())) {
                    // dont preempt same vertex task. ideally this should look at priority but we dont have it
                    continue;
                }
                if (a.getFinishTime() > creationTime && a.getFinishTime() < allocationTime) {
                    // found an attempt that was preempted within this time interval
                    step.notes.add("Potentially waited for preemption of " + a.getShortName());
                }
            }
        }
    }
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) Container(org.apache.tez.history.parser.datamodel.Container) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo)

Example 7 with TaskInfo

use of org.apache.tez.history.parser.datamodel.TaskInfo in project tez by apache.

the class SlowestVertexAnalyzer method getTaskRuntime.

private long getTaskRuntime(VertexInfo vertexInfo) {
    TaskInfo firstTaskToStart = vertexInfo.getFirstTaskToStart();
    TaskInfo lastTaskToFinish = vertexInfo.getLastTaskToFinish();
    DagInfo dagInfo = vertexInfo.getDagInfo();
    long totalTime = ((lastTaskToFinish == null) ? dagInfo.getFinishTime() : lastTaskToFinish.getFinishTime()) - ((firstTaskToStart == null) ? dagInfo.getStartTime() : firstTaskToStart.getStartTime());
    return totalTime;
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DagInfo(org.apache.tez.history.parser.datamodel.DagInfo)

Example 8 with TaskInfo

use of org.apache.tez.history.parser.datamodel.TaskInfo in project tez by apache.

the class SimpleHistoryParser method parseContents.

private void parseContents(File historyFile, String dagId) throws JSONException, FileNotFoundException, TezException {
    Scanner scanner = new Scanner(historyFile, UTF8);
    scanner.useDelimiter(SimpleHistoryLoggingService.RECORD_SEPARATOR);
    JSONObject dagJson = null;
    Map<String, JSONObject> vertexJsonMap = Maps.newHashMap();
    Map<String, JSONObject> taskJsonMap = Maps.newHashMap();
    Map<String, JSONObject> attemptJsonMap = Maps.newHashMap();
    TezDAGID tezDAGID = TezDAGID.fromString(dagId);
    String userName = null;
    while (scanner.hasNext()) {
        String line = scanner.next();
        JSONObject jsonObject = new JSONObject(line);
        String entity = jsonObject.getString(Constants.ENTITY);
        String entityType = jsonObject.getString(Constants.ENTITY_TYPE);
        switch(entityType) {
            case Constants.TEZ_DAG_ID:
                if (!dagId.equals(entity)) {
                    LOG.warn(dagId + " is not matching with " + entity);
                    continue;
                }
                // time etc).
                if (dagJson == null) {
                    dagJson = jsonObject;
                }
                JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                JSONObject dagOtherInfo = dagJson.getJSONObject(Constants.OTHER_INFO);
                JSONArray relatedEntities = dagJson.optJSONArray(Constants.RELATED_ENTITIES);
                // {"entity":"userXYZ","entitytype":"user"}
                if (relatedEntities != null) {
                    for (int i = 0; i < relatedEntities.length(); i++) {
                        JSONObject subEntity = relatedEntities.getJSONObject(i);
                        String subEntityType = subEntity.optString(Constants.ENTITY_TYPE);
                        if (subEntityType != null && subEntityType.equals(Constants.USER)) {
                            userName = subEntity.getString(Constants.ENTITY);
                            break;
                        }
                    }
                }
                populateOtherInfo(otherInfo, dagOtherInfo);
                break;
            case Constants.TEZ_VERTEX_ID:
                String vertexName = entity;
                TezVertexID tezVertexID = TezVertexID.fromString(vertexName);
                if (!tezDAGID.equals(tezVertexID.getDAGId())) {
                    LOG.warn(vertexName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!vertexJsonMap.containsKey(vertexName)) {
                    vertexJsonMap.put(vertexName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, vertexName, vertexJsonMap);
                break;
            case Constants.TEZ_TASK_ID:
                String taskName = entity;
                TezTaskID tezTaskID = TezTaskID.fromString(taskName);
                if (!tezDAGID.equals(tezTaskID.getVertexID().getDAGId())) {
                    LOG.warn(taskName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!taskJsonMap.containsKey(taskName)) {
                    taskJsonMap.put(taskName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, taskName, taskJsonMap);
                break;
            case Constants.TEZ_TASK_ATTEMPT_ID:
                String taskAttemptName = entity;
                TezTaskAttemptID tezAttemptId = TezTaskAttemptID.fromString(taskAttemptName);
                if (!tezDAGID.equals(tezAttemptId.getTaskID().getVertexID().getDAGId())) {
                    LOG.warn(taskAttemptName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!attemptJsonMap.containsKey(taskAttemptName)) {
                    attemptJsonMap.put(taskAttemptName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, taskAttemptName, attemptJsonMap);
                break;
            default:
                break;
        }
    }
    scanner.close();
    if (dagJson != null) {
        this.dagInfo = DagInfo.create(dagJson);
        setUserName(userName);
    } else {
        LOG.error("Dag is not yet parsed. Looks like partial file.");
        throw new TezException("Please provide a valid/complete history log file containing " + dagId);
    }
    for (JSONObject jsonObject : vertexJsonMap.values()) {
        VertexInfo vertexInfo = VertexInfo.create(jsonObject);
        this.vertexList.add(vertexInfo);
        LOG.debug("Parsed vertex {}", vertexInfo.getVertexName());
    }
    for (JSONObject jsonObject : taskJsonMap.values()) {
        TaskInfo taskInfo = TaskInfo.create(jsonObject);
        this.taskList.add(taskInfo);
        LOG.debug("Parsed task {}", taskInfo.getTaskId());
    }
    for (JSONObject jsonObject : attemptJsonMap.values()) {
        /**
         * For converting SimpleHistoryLogging to in-memory representation
         *
         * We need to get "relatedEntities":[{"entity":"cn055-10.l42scl.hortonworks.com:58690",
         * "entitytype":"nodeId"},{"entity":"container_1438652049951_0008_01_000152",
         * "entitytype":"containerId"} and populate it in otherInfo object so that in-memory
         * representation can parse it correctly
         */
        JSONArray relatedEntities = jsonObject.optJSONArray(Constants.RELATED_ENTITIES);
        if (relatedEntities == null) {
            // This can happen when CONTAINER_EXITED abruptly. (e.g Container failed, exitCode=1)
            LOG.debug("entity {} did not have related entities", jsonObject.optJSONObject(Constants.ENTITY));
        } else {
            JSONObject subJsonObject = relatedEntities.optJSONObject(0);
            if (subJsonObject != null) {
                String nodeId = subJsonObject.optString(Constants.ENTITY_TYPE);
                if (!Strings.isNullOrEmpty(nodeId) && nodeId.equalsIgnoreCase(Constants.NODE_ID)) {
                    // populate it in otherInfo
                    JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                    String nodeIdVal = subJsonObject.optString(Constants.ENTITY);
                    if (otherInfo != null && nodeIdVal != null) {
                        otherInfo.put(Constants.NODE_ID, nodeIdVal);
                    }
                }
            }
            subJsonObject = relatedEntities.optJSONObject(1);
            if (subJsonObject != null) {
                String containerId = subJsonObject.optString(Constants.ENTITY_TYPE);
                if (!Strings.isNullOrEmpty(containerId) && containerId.equalsIgnoreCase(Constants.CONTAINER_ID)) {
                    // populate it in otherInfo
                    JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                    String containerIdVal = subJsonObject.optString(Constants.ENTITY);
                    if (otherInfo != null && containerIdVal != null) {
                        otherInfo.put(Constants.CONTAINER_ID, containerIdVal);
                    }
                }
            }
        }
        TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(jsonObject);
        this.attemptList.add(attemptInfo);
        LOG.debug("Parsed task attempt {}", attemptInfo.getTaskAttemptId());
    }
}
Also used : TezException(org.apache.tez.dag.api.TezException) Scanner(java.util.Scanner) JSONArray(org.codehaus.jettison.json.JSONArray) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) JSONObject(org.codehaus.jettison.json.JSONObject) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 9 with TaskInfo

use of org.apache.tez.history.parser.datamodel.TaskInfo in project tez by apache.

the class TestHistoryParser method verifyJobSpecificInfo.

private void verifyJobSpecificInfo(DagInfo dagInfo) {
    // Job specific
    assertTrue(dagInfo.getNumVertices() == 2);
    assertTrue(dagInfo.getName().equals("WordCount"));
    assertTrue(dagInfo.getVertex(TOKENIZER).getProcessorClassName().equals(WordCount.TokenProcessor.class.getName()));
    assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName().equals(WordCount.SumProcessor.class.getName()));
    assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
    assertTrue(dagInfo.getEdges().size() == 1);
    EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
    assertTrue(edgeInfo.getDataMovementType().equals(EdgeProperty.DataMovementType.SCATTER_GATHER.toString()));
    assertTrue(edgeInfo.getSourceVertex().getVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getDestinationVertex().getVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getInputVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getOutputVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getEdgeSourceClass().equals(OrderedPartitionedKVOutput.class.getName()));
    assertTrue(edgeInfo.getEdgeDestinationClass().equals(OrderedGroupedKVInput.class.getName()));
    assertTrue(dagInfo.getVertices().size() == 2);
    String lastSourceTA = null;
    String lastDataEventSourceTA = null;
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        assertTrue(vertexInfo.getKilledTasksCount() == 0);
        assertTrue(vertexInfo.getInitRequestedTime() > 0);
        assertTrue(vertexInfo.getInitTime() > 0);
        assertTrue(vertexInfo.getStartRequestedTime() > 0);
        assertTrue(vertexInfo.getStartTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
        long finishTime = 0;
        for (TaskInfo taskInfo : vertexInfo.getTasks()) {
            assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
            assertTrue(taskInfo.getMaxTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getMinTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getAvgTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getLastTaskAttemptToFinish() != null);
            assertTrue(taskInfo.getContainersMapping().size() > 0);
            assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
            assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
            assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
            assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
            List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
            if (vertexInfo.getVertexName().equals(TOKENIZER)) {
                // get the last task to finish and track its successful attempt
                if (finishTime < taskInfo.getFinishTime()) {
                    finishTime = taskInfo.getFinishTime();
                    lastSourceTA = taskInfo.getSuccessfulAttemptId();
                }
            } else {
                for (TaskAttemptInfo attempt : attempts) {
                    DataDependencyEvent item = attempt.getLastDataEvents().get(0);
                    assertTrue(item.getTimestamp() > 0);
                    if (lastDataEventSourceTA == null) {
                        lastDataEventSourceTA = item.getTaskAttemptId();
                    } else {
                        // all attempts should have the same last data event source TA
                        assertTrue(lastDataEventSourceTA.equals(item.getTaskAttemptId()));
                    }
                }
            }
            for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
                assertTrue(attemptInfo.getCreationTime() > 0);
                assertTrue(attemptInfo.getAllocationTime() > 0);
                assertTrue(attemptInfo.getStartTime() > 0);
                assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
            }
        }
        assertTrue(vertexInfo.getLastTaskToFinish() != null);
        if (vertexInfo.getVertexName().equals(TOKENIZER)) {
            assertTrue(vertexInfo.getInputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputVertices().size() == 1);
            assertTrue(vertexInfo.getInputVertices().size() == 0);
        } else {
            assertTrue(vertexInfo.getInputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputVertices().size() == 0);
            assertTrue(vertexInfo.getInputVertices().size() == 1);
        }
    }
    assertTrue(lastSourceTA.equals(lastDataEventSourceTA));
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DataDependencyEvent(org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent) WordCount(org.apache.tez.examples.WordCount) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Aggregations

TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)9 TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)6 VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)6 DagInfo (org.apache.tez.history.parser.datamodel.DagInfo)2 Scanner (java.util.Scanner)1 TezException (org.apache.tez.dag.api.TezException)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezTaskID (org.apache.tez.dag.records.TezTaskID)1 TezVertexID (org.apache.tez.dag.records.TezVertexID)1 WordCount (org.apache.tez.examples.WordCount)1 Container (org.apache.tez.history.parser.datamodel.Container)1 EdgeInfo (org.apache.tez.history.parser.datamodel.EdgeInfo)1 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)1 JSONArray (org.codehaus.jettison.json.JSONArray)1 JSONObject (org.codehaus.jettison.json.JSONObject)1 Test (org.junit.Test)1