use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class Utils method generateDAGVizFile.
public static void generateDAGVizFile(DagInfo dagInfo, String fileName, @Nullable List<String> criticalVertices) throws IOException {
Graph graph = new Graph(sanitizeLabelForViz(dagInfo.getName()));
for (VertexInfo v : dagInfo.getVertices()) {
String nodeLabel = sanitizeLabelForViz(v.getVertexName()) + "[" + getShortClassName(v.getProcessorClassName() + ", tasks=" + v.getTasks().size() + ", time=" + v.getTimeTaken() + " ms]");
Graph.Node n = graph.newNode(sanitizeLabelForViz(v.getVertexName()), nodeLabel);
boolean criticalVertex = (criticalVertices != null) ? criticalVertices.contains(v.getVertexName()) : false;
if (criticalVertex) {
n.setColor("red");
}
for (AdditionalInputOutputDetails input : v.getAdditionalInputInfoList()) {
Graph.Node inputNode = graph.getNode(sanitizeLabelForViz(v.getVertexName()) + "_" + sanitizeLabelForViz(input.getName()));
inputNode.setLabel(sanitizeLabelForViz(v.getVertexName()) + "[" + sanitizeLabelForViz(input.getName()) + "]");
inputNode.setShape("box");
inputNode.addEdge(n, "Input name=" + input.getName() + " [inputClass=" + getShortClassName(input.getClazz()) + ", initializer=" + getShortClassName(input.getInitializer()) + "]");
}
for (AdditionalInputOutputDetails output : v.getAdditionalOutputInfoList()) {
Graph.Node outputNode = graph.getNode(sanitizeLabelForViz(v.getVertexName()) + "_" + sanitizeLabelForViz(output.getName()));
outputNode.setLabel(sanitizeLabelForViz(v.getVertexName()) + "[" + sanitizeLabelForViz(output.getName()) + "]");
outputNode.setShape("box");
n.addEdge(outputNode, "Output name=" + output.getName() + " [outputClass=" + getShortClassName(output.getClazz()) + ", committer=" + getShortClassName(output.getInitializer()) + "]");
}
}
for (EdgeInfo e : dagInfo.getEdges()) {
Graph.Node n = graph.getNode(sanitizeLabelForViz(e.getInputVertexName()));
n.addEdge(graph.getNode(sanitizeLabelForViz(e.getOutputVertexName())), "[input=" + getShortClassName(e.getEdgeSourceClass()) + ", output=" + getShortClassName(e.getEdgeDestinationClass()) + ", dataMovement=" + e.getDataMovementType().trim() + "]");
}
graph.save(fileName);
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class LocalityAnalyzer method analyze.
@Override
public void analyze(DagInfo dagInfo) throws TezException {
for (VertexInfo vertexInfo : dagInfo.getVertices()) {
String vertexName = vertexInfo.getVertexName();
Map<String, TezCounter> dataLocalTask = vertexInfo.getCounter(DAGCounter.class.getName(), DAGCounter.DATA_LOCAL_TASKS.toString());
Map<String, TezCounter> rackLocalTask = vertexInfo.getCounter(DAGCounter.class.getName(), DAGCounter.RACK_LOCAL_TASKS.toString());
long dataLocalTasks = 0;
long rackLocalTasks = 0;
if (!dataLocalTask.isEmpty()) {
dataLocalTasks = dataLocalTask.get(DAGCounter.class.getName()).getValue();
}
if (!rackLocalTask.isEmpty()) {
rackLocalTasks = rackLocalTask.get(DAGCounter.class.getName()).getValue();
}
long totalVertexTasks = vertexInfo.getNumTasks();
if (dataLocalTasks > 0 || rackLocalTasks > 0) {
// compute locality details.
float dataLocalRatio = dataLocalTasks * 1.0f / totalVertexTasks;
float rackLocalRatio = rackLocalTasks * 1.0f / totalVertexTasks;
float othersRatio = (totalVertexTasks - (dataLocalTasks + rackLocalTasks)) * 1.0f / totalVertexTasks;
List<String> record = Lists.newLinkedList();
record.add(vertexName);
record.add(totalVertexTasks + "");
record.add(dataLocalRatio + "");
record.add(rackLocalRatio + "");
record.add(othersRatio + "");
TaskAttemptDetails dataLocalResult = computeAverages(vertexInfo, DAGCounter.DATA_LOCAL_TASKS);
TaskAttemptDetails rackLocalResult = computeAverages(vertexInfo, DAGCounter.RACK_LOCAL_TASKS);
TaskAttemptDetails otherTaskResult = computeAverages(vertexInfo, DAGCounter.OTHER_LOCAL_TASKS);
record.add(dataLocalResult.avgRuntime + "");
record.add(rackLocalResult.avgRuntime + "");
record.add(otherTaskResult.avgRuntime + "");
// Get the number of inputs to this vertex
record.add(vertexInfo.getInputEdges().size() + vertexInfo.getAdditionalInputInfoList().size() + "");
// Get the avg HDFS bytes read in this vertex for different type of locality
record.add(dataLocalResult.avgHDFSBytesRead + "");
record.add(rackLocalResult.avgHDFSBytesRead + "");
record.add(otherTaskResult.avgHDFSBytesRead + "");
String recommendation = "";
if (dataLocalRatio < config.getFloat(DATA_LOCAL_RATIO, DATA_LOCAL_RATIO_DEFAULT)) {
recommendation = "Data locality is poor for this vertex. Try tuning " + TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS + ", " + TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED + ", " + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED;
}
record.add(recommendation);
csvResult.addRecord(record.toArray(new String[record.size()]));
}
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class ShuffleTimeAnalyzer method analyze.
@Override
public void analyze(DagInfo dagInfo) throws TezException {
for (VertexInfo vertexInfo : dagInfo.getVertices()) {
for (TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
// counter_group (basically source) --> counter
Map<String, TezCounter> reduceInputGroups = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_GROUPS.toString());
Map<String, TezCounter> reduceInputRecords = attemptInfo.getCounter(TaskCounter.REDUCE_INPUT_RECORDS.toString());
if (reduceInputGroups == null) {
continue;
}
for (Map.Entry<String, TezCounter> entry : reduceInputGroups.entrySet()) {
String counterGroupName = entry.getKey();
long reduceInputGroupsVal = entry.getValue().getValue();
long reduceInputRecordsVal = (reduceInputRecords.get(counterGroupName) != null) ? reduceInputRecords.get(counterGroupName).getValue() : 0;
if (reduceInputRecordsVal <= 0) {
continue;
}
float ratio = (reduceInputGroupsVal * 1.0f / reduceInputRecordsVal);
if (ratio > 0 && reduceInputRecordsVal > minShuffleRecords) {
List<String> result = Lists.newLinkedList();
result.add(vertexInfo.getVertexName());
result.add(attemptInfo.getTaskAttemptId());
result.add(attemptInfo.getNodeId());
result.add(counterGroupName);
// Real work done in the task
String comments = "";
String mergePhaseTime = getCounterValue(TaskCounter.MERGE_PHASE_TIME, counterGroupName, attemptInfo);
String timeTakenForRealWork = "";
if (!Strings.isNullOrEmpty(mergePhaseTime)) {
long realWorkDone = attemptInfo.getTimeTaken() - Long.parseLong(mergePhaseTime);
if ((realWorkDone * 1.0f / attemptInfo.getTimeTaken()) < realWorkDoneRatio) {
comments = "Time taken in shuffle is more than the actual work being done in task. " + " Check if source/destination machine is a slow node. Check if merge phase " + "time is more to understand disk bottlenecks in this node. Check for skew";
}
timeTakenForRealWork = Long.toString(realWorkDone);
}
result.add(comments);
result.add(reduceInputGroupsVal + "");
result.add(reduceInputRecordsVal + "");
result.add("" + (1.0f * reduceInputGroupsVal / reduceInputRecordsVal));
result.add(getCounterValue(TaskCounter.SHUFFLE_BYTES, counterGroupName, attemptInfo));
result.add(Long.toString(attemptInfo.getTimeTaken()));
// Total time taken for receiving all events from source tasks
result.add(getOverheadFromSourceTasks(counterGroupName, attemptInfo));
result.add(getCounterValue(TaskCounter.MERGE_PHASE_TIME, counterGroupName, attemptInfo));
result.add(getCounterValue(TaskCounter.SHUFFLE_PHASE_TIME, counterGroupName, attemptInfo));
result.add(timeTakenForRealWork);
result.add(getCounterValue(TaskCounter.FIRST_EVENT_RECEIVED, counterGroupName, attemptInfo));
result.add(getCounterValue(TaskCounter.LAST_EVENT_RECEIVED, counterGroupName, attemptInfo));
result.add(getCounterValue(TaskCounter.SHUFFLE_BYTES_DISK_DIRECT, counterGroupName, attemptInfo));
csvResult.addRecord(result.toArray(new String[result.size()]));
}
}
}
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class SimpleHistoryParser method parseContents.
private void parseContents(File historyFile, String dagId) throws JSONException, FileNotFoundException, TezException {
Scanner scanner = new Scanner(historyFile, UTF8);
scanner.useDelimiter(SimpleHistoryLoggingService.RECORD_SEPARATOR);
JSONObject dagJson = null;
Map<String, JSONObject> vertexJsonMap = Maps.newHashMap();
Map<String, JSONObject> taskJsonMap = Maps.newHashMap();
Map<String, JSONObject> attemptJsonMap = Maps.newHashMap();
TezDAGID tezDAGID = TezDAGID.fromString(dagId);
String userName = null;
while (scanner.hasNext()) {
String line = scanner.next();
JSONObject jsonObject = new JSONObject(line);
String entity = jsonObject.getString(Constants.ENTITY);
String entityType = jsonObject.getString(Constants.ENTITY_TYPE);
switch(entityType) {
case Constants.TEZ_DAG_ID:
if (!dagId.equals(entity)) {
LOG.warn(dagId + " is not matching with " + entity);
continue;
}
// time etc).
if (dagJson == null) {
dagJson = jsonObject;
}
JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
JSONObject dagOtherInfo = dagJson.getJSONObject(Constants.OTHER_INFO);
JSONArray relatedEntities = dagJson.optJSONArray(Constants.RELATED_ENTITIES);
// {"entity":"userXYZ","entitytype":"user"}
if (relatedEntities != null) {
for (int i = 0; i < relatedEntities.length(); i++) {
JSONObject subEntity = relatedEntities.getJSONObject(i);
String subEntityType = subEntity.optString(Constants.ENTITY_TYPE);
if (subEntityType != null && subEntityType.equals(Constants.USER)) {
userName = subEntity.getString(Constants.ENTITY);
break;
}
}
}
populateOtherInfo(otherInfo, dagOtherInfo);
break;
case Constants.TEZ_VERTEX_ID:
String vertexName = entity;
TezVertexID tezVertexID = TezVertexID.fromString(vertexName);
if (!tezDAGID.equals(tezVertexID.getDAGId())) {
LOG.warn(vertexName + " does not belong to " + tezDAGID);
continue;
}
if (!vertexJsonMap.containsKey(vertexName)) {
vertexJsonMap.put(vertexName, jsonObject);
}
otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
populateOtherInfo(otherInfo, vertexName, vertexJsonMap);
break;
case Constants.TEZ_TASK_ID:
String taskName = entity;
TezTaskID tezTaskID = TezTaskID.fromString(taskName);
if (!tezDAGID.equals(tezTaskID.getVertexID().getDAGId())) {
LOG.warn(taskName + " does not belong to " + tezDAGID);
continue;
}
if (!taskJsonMap.containsKey(taskName)) {
taskJsonMap.put(taskName, jsonObject);
}
otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
populateOtherInfo(otherInfo, taskName, taskJsonMap);
break;
case Constants.TEZ_TASK_ATTEMPT_ID:
String taskAttemptName = entity;
TezTaskAttemptID tezAttemptId = TezTaskAttemptID.fromString(taskAttemptName);
if (!tezDAGID.equals(tezAttemptId.getTaskID().getVertexID().getDAGId())) {
LOG.warn(taskAttemptName + " does not belong to " + tezDAGID);
continue;
}
if (!attemptJsonMap.containsKey(taskAttemptName)) {
attemptJsonMap.put(taskAttemptName, jsonObject);
}
otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
populateOtherInfo(otherInfo, taskAttemptName, attemptJsonMap);
break;
default:
break;
}
}
scanner.close();
if (dagJson != null) {
this.dagInfo = DagInfo.create(dagJson);
setUserName(userName);
} else {
LOG.error("Dag is not yet parsed. Looks like partial file.");
throw new TezException("Please provide a valid/complete history log file containing " + dagId);
}
for (JSONObject jsonObject : vertexJsonMap.values()) {
VertexInfo vertexInfo = VertexInfo.create(jsonObject);
this.vertexList.add(vertexInfo);
LOG.debug("Parsed vertex {}", vertexInfo.getVertexName());
}
for (JSONObject jsonObject : taskJsonMap.values()) {
TaskInfo taskInfo = TaskInfo.create(jsonObject);
this.taskList.add(taskInfo);
LOG.debug("Parsed task {}", taskInfo.getTaskId());
}
for (JSONObject jsonObject : attemptJsonMap.values()) {
/**
* For converting SimpleHistoryLogging to in-memory representation
*
* We need to get "relatedEntities":[{"entity":"cn055-10.l42scl.hortonworks.com:58690",
* "entitytype":"nodeId"},{"entity":"container_1438652049951_0008_01_000152",
* "entitytype":"containerId"} and populate it in otherInfo object so that in-memory
* representation can parse it correctly
*/
JSONArray relatedEntities = jsonObject.optJSONArray(Constants.RELATED_ENTITIES);
if (relatedEntities == null) {
// This can happen when CONTAINER_EXITED abruptly. (e.g Container failed, exitCode=1)
LOG.debug("entity {} did not have related entities", jsonObject.optJSONObject(Constants.ENTITY));
} else {
JSONObject subJsonObject = relatedEntities.optJSONObject(0);
if (subJsonObject != null) {
String nodeId = subJsonObject.optString(Constants.ENTITY_TYPE);
if (!Strings.isNullOrEmpty(nodeId) && nodeId.equalsIgnoreCase(Constants.NODE_ID)) {
// populate it in otherInfo
JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
String nodeIdVal = subJsonObject.optString(Constants.ENTITY);
if (otherInfo != null && nodeIdVal != null) {
otherInfo.put(Constants.NODE_ID, nodeIdVal);
}
}
}
subJsonObject = relatedEntities.optJSONObject(1);
if (subJsonObject != null) {
String containerId = subJsonObject.optString(Constants.ENTITY_TYPE);
if (!Strings.isNullOrEmpty(containerId) && containerId.equalsIgnoreCase(Constants.CONTAINER_ID)) {
// populate it in otherInfo
JSONObject otherInfo = jsonObject.optJSONObject(Constants.OTHER_INFO);
String containerIdVal = subJsonObject.optString(Constants.ENTITY);
if (otherInfo != null && containerIdVal != null) {
otherInfo.put(Constants.CONTAINER_ID, containerIdVal);
}
}
}
}
TaskAttemptInfo attemptInfo = TaskAttemptInfo.create(jsonObject);
this.attemptList.add(attemptInfo);
LOG.debug("Parsed task attempt {}", attemptInfo.getTaskAttemptId());
}
}
use of org.apache.tez.history.parser.datamodel.VertexInfo in project tez by apache.
the class TestHistoryParser method verifyJobSpecificInfo.
private void verifyJobSpecificInfo(DagInfo dagInfo) {
// Job specific
assertTrue(dagInfo.getNumVertices() == 2);
assertTrue(dagInfo.getName().equals("WordCount"));
assertTrue(dagInfo.getVertex(TOKENIZER).getProcessorClassName().equals(WordCount.TokenProcessor.class.getName()));
assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName().equals(WordCount.SumProcessor.class.getName()));
assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
assertTrue(dagInfo.getEdges().size() == 1);
EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
assertTrue(edgeInfo.getDataMovementType().equals(EdgeProperty.DataMovementType.SCATTER_GATHER.toString()));
assertTrue(edgeInfo.getSourceVertex().getVertexName().equals(TOKENIZER));
assertTrue(edgeInfo.getDestinationVertex().getVertexName().equals(SUMMATION));
assertTrue(edgeInfo.getInputVertexName().equals(TOKENIZER));
assertTrue(edgeInfo.getOutputVertexName().equals(SUMMATION));
assertTrue(edgeInfo.getEdgeSourceClass().equals(OrderedPartitionedKVOutput.class.getName()));
assertTrue(edgeInfo.getEdgeDestinationClass().equals(OrderedGroupedKVInput.class.getName()));
assertTrue(dagInfo.getVertices().size() == 2);
String lastSourceTA = null;
String lastDataEventSourceTA = null;
for (VertexInfo vertexInfo : dagInfo.getVertices()) {
assertTrue(vertexInfo.getKilledTasksCount() == 0);
assertTrue(vertexInfo.getInitRequestedTime() > 0);
assertTrue(vertexInfo.getInitTime() > 0);
assertTrue(vertexInfo.getStartRequestedTime() > 0);
assertTrue(vertexInfo.getStartTime() > 0);
assertTrue(vertexInfo.getFinishTime() > 0);
assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
long finishTime = 0;
for (TaskInfo taskInfo : vertexInfo.getTasks()) {
assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
assertTrue(taskInfo.getMaxTaskAttemptDuration() >= 0);
assertTrue(taskInfo.getMinTaskAttemptDuration() >= 0);
assertTrue(taskInfo.getAvgTaskAttemptDuration() >= 0);
assertTrue(taskInfo.getLastTaskAttemptToFinish() != null);
assertTrue(taskInfo.getContainersMapping().size() > 0);
assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
if (vertexInfo.getVertexName().equals(TOKENIZER)) {
// get the last task to finish and track its successful attempt
if (finishTime < taskInfo.getFinishTime()) {
finishTime = taskInfo.getFinishTime();
lastSourceTA = taskInfo.getSuccessfulAttemptId();
}
} else {
for (TaskAttemptInfo attempt : attempts) {
DataDependencyEvent item = attempt.getLastDataEvents().get(0);
assertTrue(item.getTimestamp() > 0);
if (lastDataEventSourceTA == null) {
lastDataEventSourceTA = item.getTaskAttemptId();
} else {
// all attempts should have the same last data event source TA
assertTrue(lastDataEventSourceTA.equals(item.getTaskAttemptId()));
}
}
}
for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
assertTrue(attemptInfo.getCreationTime() > 0);
assertTrue(attemptInfo.getAllocationTime() > 0);
assertTrue(attemptInfo.getStartTime() > 0);
assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
}
}
assertTrue(vertexInfo.getLastTaskToFinish() != null);
if (vertexInfo.getVertexName().equals(TOKENIZER)) {
assertTrue(vertexInfo.getInputEdges().size() == 0);
assertTrue(vertexInfo.getOutputEdges().size() == 1);
assertTrue(vertexInfo.getOutputVertices().size() == 1);
assertTrue(vertexInfo.getInputVertices().size() == 0);
} else {
assertTrue(vertexInfo.getInputEdges().size() == 1);
assertTrue(vertexInfo.getOutputEdges().size() == 0);
assertTrue(vertexInfo.getOutputVertices().size() == 0);
assertTrue(vertexInfo.getInputVertices().size() == 1);
}
}
assertTrue(lastSourceTA.equals(lastDataEventSourceTA));
}
Aggregations