Search in sources :

Example 21 with TaskAttemptInfo

use of org.apache.tez.history.parser.datamodel.TaskAttemptInfo in project tez by apache.

the class SimpleHistoryParser method parseContents.

private void parseContents(File historyFile, String dagId) throws JSONException, FileNotFoundException, TezException {
    Scanner scanner = new Scanner(historyFile, UTF8);
    scanner.useDelimiter(SimpleHistoryLoggingService.RECORD_SEPARATOR);
    JSONObject dagJson = null;
    Map<String, JSONObject> vertexJsonMap = Maps.newHashMap();
    Map<String, JSONObject> taskJsonMap = Maps.newHashMap();
    Map<String, JSONObject> attemptJsonMap = Maps.newHashMap();
    TezDAGID tezDAGID = TezDAGID.fromString(dagId);
    String userName = null;
    while (scanner.hasNext()) {
        String line = scanner.next();
        JSONObject jsonObject = new JSONObject(line);
        String entity = jsonObject.getString(Constants.ENTITY);
        String entityType = jsonObject.getString(Constants.ENTITY_TYPE);
        switch(entityType) {
            case Constants.TEZ_DAG_ID:
                if (!dagId.equals(entity)) {
                    LOG.warn(dagId + " is not matching with " + entity);
                    continue;
                }
                // time etc).
                if (dagJson == null) {
                    dagJson = jsonObject;
                }
                JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                JSONObject dagOtherInfo = dagJson.getJSONObject(Constants.OTHER_INFO);
                JSONArray relatedEntities = dagJson.optJSONArray(Constants.RELATED_ENTITIES);
                // {"entity":"userXYZ","entitytype":"user"}
                if (relatedEntities != null) {
                    for (int i = 0; i < relatedEntities.length(); i++) {
                        JSONObject subEntity = relatedEntities.getJSONObject(i);
                        String subEntityType = subEntity.optString(Constants.ENTITY_TYPE);
                        if (subEntityType != null && subEntityType.equals(Constants.USER)) {
                            userName = subEntity.getString(Constants.ENTITY);
                            break;
                        }
                    }
                }
                populateOtherInfo(otherInfo, dagOtherInfo);
                break;
            case Constants.TEZ_VERTEX_ID:
                String vertexName = entity;
                TezVertexID tezVertexID = TezVertexID.fromString(vertexName);
                if (!tezDAGID.equals(tezVertexID.getDAGId())) {
                    LOG.warn(vertexName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!vertexJsonMap.containsKey(vertexName)) {
                    vertexJsonMap.put(vertexName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, vertexName, vertexJsonMap);
                break;
            case Constants.TEZ_TASK_ID:
                String taskName = entity;
                TezTaskID tezTaskID = TezTaskID.fromString(taskName);
                if (!tezDAGID.equals(tezTaskID.getVertexID().getDAGId())) {
                    LOG.warn(taskName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!taskJsonMap.containsKey(taskName)) {
                    taskJsonMap.put(taskName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, taskName, taskJsonMap);
                break;
            case Constants.TEZ_TASK_ATTEMPT_ID:
                String taskAttemptName = entity;
                TezTaskAttemptID tezAttemptId = TezTaskAttemptID.fromString(taskAttemptName);
                if (!tezDAGID.equals(tezAttemptId.getTaskID().getVertexID().getDAGId())) {
                    LOG.warn(taskAttemptName + " does not belong to " + tezDAGID);
                    continue;
                }
                if (!attemptJsonMap.containsKey(taskAttemptName)) {
                    attemptJsonMap.put(taskAttemptName, jsonObject);
                }
                otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                populateOtherInfo(otherInfo, taskAttemptName, attemptJsonMap);
                break;
            default:
                break;
        }
    }
    scanner.close();
    if (dagJson != null) {
        this.dagInfo = DagInfo.create(dagJson);
        setUserName(userName);
    } else {
        LOG.error("Dag is not yet parsed. Looks like partial file.");
        throw new TezException("Please provide a valid/complete history log file containing " + dagId);
    }
    for (JSONObject jsonObject : vertexJsonMap.values()) {
        VertexInfo vertexInfo = VertexInfo.create(jsonObject);
        this.vertexList.add(vertexInfo);
        LOG.debug("Parsed vertex {}", vertexInfo.getVertexName());
    }
    for (JSONObject jsonObject : taskJsonMap.values()) {
        TaskInfo taskInfo = TaskInfo.create(jsonObject);
        this.taskList.add(taskInfo);
        LOG.debug("Parsed task {}", taskInfo.getTaskId());
    }
    for (JSONObject jsonObject : attemptJsonMap.values()) {
        /**
         * For converting SimpleHistoryLogging to in-memory representation
         *
         * We need to get "relatedEntities":[{"entity":"cn055-10.l42scl.hortonworks.com:58690",
         * "entitytype":"nodeId"},{"entity":"container_1438652049951_0008_01_000152",
         * "entitytype":"containerId"} and populate it in otherInfo object so that in-memory
         * representation can parse it correctly
         */
        JSONArray relatedEntities = jsonObject.optJSONArray(Constants.RELATED_ENTITIES);
        if (relatedEntities == null) {
            // This can happen when CONTAINER_EXITED abruptly. (e.g Container failed, exitCode=1)
            LOG.debug("entity {} did not have related entities", jsonObject.optJSONObject(Constants.ENTITY));
        } else {
            JSONObject subJsonObject = relatedEntities.optJSONObject(0);
            if (subJsonObject != null) {
                String nodeId = subJsonObject.optString(Constants.ENTITY_TYPE);
                if (!Strings.isNullOrEmpty(nodeId) && nodeId.equalsIgnoreCase(Constants.NODE_ID)) {
                    // populate it in otherInfo
                    JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                    String nodeIdVal = subJsonObject.optString(Constants.ENTITY);
                    if (otherInfo != null && nodeIdVal != null) {
                        otherInfo.put(Constants.NODE_ID, nodeIdVal);
                    }
                }
            }
            subJsonObject = relatedEntities.optJSONObject(1);
            if (subJsonObject != null) {
                String containerId = subJsonObject.optString(Constants.ENTITY_TYPE);
                if (!Strings.isNullOrEmpty(containerId) && containerId.equalsIgnoreCase(Constants.CONTAINER_ID)) {
                    // populate it in otherInfo
                    JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
                    String containerIdVal = subJsonObject.optString(Constants.ENTITY);
                    if (otherInfo != null && containerIdVal != null) {
                        otherInfo.put(Constants.CONTAINER_ID, containerIdVal);
                    }
                }
            }
        }
        TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(jsonObject);
        this.attemptList.add(attemptInfo);
        LOG.debug("Parsed task attempt {}", attemptInfo.getTaskAttemptId());
    }
}
Also used : TezException(org.apache.tez.dag.api.TezException) Scanner(java.util.Scanner) JSONArray(org.codehaus.jettison.json.JSONArray) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) JSONObject(org.codehaus.jettison.json.JSONObject) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) TezDAGID(org.apache.tez.dag.records.TezDAGID) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID)

Example 22 with TaskAttemptInfo

use of org.apache.tez.history.parser.datamodel.TaskAttemptInfo in project tez by apache.

the class ATSFileParser method processAttempts.

/**
 * Parse TaskAttempt json
 *
 * @param taskAttemptsJson
 * @throws JSONException
 */
private void processAttempts(JSONArray taskAttemptsJson) throws JSONException {
    // Process TaskAttempt information
    Preconditions.checkState(taskAttemptsJson != null, "Attempts json can not be null");
    if (taskAttemptsJson != null) {
        LOG.debug("Started parsing task attempts");
        for (int i = 0; i < taskAttemptsJson.length(); i++) {
            TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(taskAttemptsJson.getJSONObject(i));
            attemptList.add(attemptInfo);
        }
        LOG.debug("Finished parsing task attempts");
    }
}
Also used : TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo)

Example 23 with TaskAttemptInfo

use of org.apache.tez.history.parser.datamodel.TaskAttemptInfo in project tez by apache.

the class TestHistoryParser method verifyJobSpecificInfo.

private void verifyJobSpecificInfo(DagInfo dagInfo) {
    // Job specific
    assertTrue(dagInfo.getNumVertices() == 2);
    assertTrue(dagInfo.getName().equals("WordCount"));
    assertTrue(dagInfo.getVertex(TOKENIZER).getProcessorClassName().equals(WordCount.TokenProcessor.class.getName()));
    assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName().equals(WordCount.SumProcessor.class.getName()));
    assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
    assertTrue(dagInfo.getEdges().size() == 1);
    EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
    assertTrue(edgeInfo.getDataMovementType().equals(EdgeProperty.DataMovementType.SCATTER_GATHER.toString()));
    assertTrue(edgeInfo.getSourceVertex().getVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getDestinationVertex().getVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getInputVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getOutputVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getEdgeSourceClass().equals(OrderedPartitionedKVOutput.class.getName()));
    assertTrue(edgeInfo.getEdgeDestinationClass().equals(OrderedGroupedKVInput.class.getName()));
    assertTrue(dagInfo.getVertices().size() == 2);
    String lastSourceTA = null;
    String lastDataEventSourceTA = null;
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        assertTrue(vertexInfo.getKilledTasksCount() == 0);
        assertTrue(vertexInfo.getInitRequestedTime() > 0);
        assertTrue(vertexInfo.getInitTime() > 0);
        assertTrue(vertexInfo.getStartRequestedTime() > 0);
        assertTrue(vertexInfo.getStartTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
        long finishTime = 0;
        for (TaskInfo taskInfo : vertexInfo.getTasks()) {
            assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
            assertTrue(taskInfo.getMaxTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getMinTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getAvgTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getLastTaskAttemptToFinish() != null);
            assertTrue(taskInfo.getContainersMapping().size() > 0);
            assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
            assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
            assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
            assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
            List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
            if (vertexInfo.getVertexName().equals(TOKENIZER)) {
                // get the last task to finish and track its successful attempt
                if (finishTime < taskInfo.getFinishTime()) {
                    finishTime = taskInfo.getFinishTime();
                    lastSourceTA = taskInfo.getSuccessfulAttemptId();
                }
            } else {
                for (TaskAttemptInfo attempt : attempts) {
                    DataDependencyEvent item = attempt.getLastDataEvents().get(0);
                    assertTrue(item.getTimestamp() > 0);
                    if (lastDataEventSourceTA == null) {
                        lastDataEventSourceTA = item.getTaskAttemptId();
                    } else {
                        // all attempts should have the same last data event source TA
                        assertTrue(lastDataEventSourceTA.equals(item.getTaskAttemptId()));
                    }
                }
            }
            for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
                assertTrue(attemptInfo.getCreationTime() > 0);
                assertTrue(attemptInfo.getAllocationTime() > 0);
                assertTrue(attemptInfo.getStartTime() > 0);
                assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
            }
        }
        assertTrue(vertexInfo.getLastTaskToFinish() != null);
        if (vertexInfo.getVertexName().equals(TOKENIZER)) {
            assertTrue(vertexInfo.getInputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputVertices().size() == 1);
            assertTrue(vertexInfo.getInputVertices().size() == 0);
        } else {
            assertTrue(vertexInfo.getInputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputVertices().size() == 0);
            assertTrue(vertexInfo.getInputVertices().size() == 1);
        }
    }
    assertTrue(lastSourceTA.equals(lastDataEventSourceTA));
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DataDependencyEvent(org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent) WordCount(org.apache.tez.examples.WordCount) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Aggregations

TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)23 VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)15 TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)6 Map (java.util.Map)5 TezCounter (org.apache.tez.common.counters.TezCounter)5 Container (org.apache.tez.history.parser.datamodel.Container)2 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)2 HashMap (java.util.HashMap)1 Scanner (java.util.Scanner)1 DAGCounter (org.apache.tez.common.counters.DAGCounter)1 FileSystemCounter (org.apache.tez.common.counters.FileSystemCounter)1 TaskCounter (org.apache.tez.common.counters.TaskCounter)1 TezCounters (org.apache.tez.common.counters.TezCounters)1 TezException (org.apache.tez.dag.api.TezException)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezTaskID (org.apache.tez.dag.records.TezTaskID)1 TezVertexID (org.apache.tez.dag.records.TezVertexID)1 WordCount (org.apache.tez.examples.WordCount)1 DagInfo (org.apache.tez.history.parser.datamodel.DagInfo)1