Search in sources :

Example 1 with WordCount

use of org.apache.tez.examples.WordCount in project tez by apache.

the class TestHistoryParser method verifyJobSpecificInfo.

private void verifyJobSpecificInfo(DagInfo dagInfo) {
    // Job specific
    assertTrue(dagInfo.getNumVertices() == 2);
    assertTrue(dagInfo.getName().equals("WordCount"));
    assertTrue(dagInfo.getVertex(TOKENIZER).getProcessorClassName().equals(WordCount.TokenProcessor.class.getName()));
    assertTrue(dagInfo.getVertex(SUMMATION).getProcessorClassName().equals(WordCount.SumProcessor.class.getName()));
    assertTrue(dagInfo.getFinishTime() > dagInfo.getStartTime());
    assertTrue(dagInfo.getEdges().size() == 1);
    EdgeInfo edgeInfo = dagInfo.getEdges().iterator().next();
    assertTrue(edgeInfo.getDataMovementType().equals(EdgeProperty.DataMovementType.SCATTER_GATHER.toString()));
    assertTrue(edgeInfo.getSourceVertex().getVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getDestinationVertex().getVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getInputVertexName().equals(TOKENIZER));
    assertTrue(edgeInfo.getOutputVertexName().equals(SUMMATION));
    assertTrue(edgeInfo.getEdgeSourceClass().equals(OrderedPartitionedKVOutput.class.getName()));
    assertTrue(edgeInfo.getEdgeDestinationClass().equals(OrderedGroupedKVInput.class.getName()));
    assertTrue(dagInfo.getVertices().size() == 2);
    String lastSourceTA = null;
    String lastDataEventSourceTA = null;
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        assertTrue(vertexInfo.getKilledTasksCount() == 0);
        assertTrue(vertexInfo.getInitRequestedTime() > 0);
        assertTrue(vertexInfo.getInitTime() > 0);
        assertTrue(vertexInfo.getStartRequestedTime() > 0);
        assertTrue(vertexInfo.getStartTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > 0);
        assertTrue(vertexInfo.getFinishTime() > vertexInfo.getStartTime());
        long finishTime = 0;
        for (TaskInfo taskInfo : vertexInfo.getTasks()) {
            assertTrue(taskInfo.getNumberOfTaskAttempts() == 1);
            assertTrue(taskInfo.getMaxTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getMinTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getAvgTaskAttemptDuration() >= 0);
            assertTrue(taskInfo.getLastTaskAttemptToFinish() != null);
            assertTrue(taskInfo.getContainersMapping().size() > 0);
            assertTrue(taskInfo.getSuccessfulTaskAttempts().size() > 0);
            assertTrue(taskInfo.getFailedTaskAttempts().size() == 0);
            assertTrue(taskInfo.getKilledTaskAttempts().size() == 0);
            assertTrue(taskInfo.getFinishTime() > taskInfo.getStartTime());
            List<TaskAttemptInfo> attempts = taskInfo.getTaskAttempts();
            if (vertexInfo.getVertexName().equals(TOKENIZER)) {
                // get the last task to finish and track its successful attempt
                if (finishTime < taskInfo.getFinishTime()) {
                    finishTime = taskInfo.getFinishTime();
                    lastSourceTA = taskInfo.getSuccessfulAttemptId();
                }
            } else {
                for (TaskAttemptInfo attempt : attempts) {
                    DataDependencyEvent item = attempt.getLastDataEvents().get(0);
                    assertTrue(item.getTimestamp() > 0);
                    if (lastDataEventSourceTA == null) {
                        lastDataEventSourceTA = item.getTaskAttemptId();
                    } else {
                        // all attempts should have the same last data event source TA
                        assertTrue(lastDataEventSourceTA.equals(item.getTaskAttemptId()));
                    }
                }
            }
            for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
                assertTrue(attemptInfo.getCreationTime() > 0);
                assertTrue(attemptInfo.getAllocationTime() > 0);
                assertTrue(attemptInfo.getStartTime() > 0);
                assertTrue(attemptInfo.getFinishTime() > attemptInfo.getStartTime());
            }
        }
        assertTrue(vertexInfo.getLastTaskToFinish() != null);
        if (vertexInfo.getVertexName().equals(TOKENIZER)) {
            assertTrue(vertexInfo.getInputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputVertices().size() == 1);
            assertTrue(vertexInfo.getInputVertices().size() == 0);
        } else {
            assertTrue(vertexInfo.getInputEdges().size() == 1);
            assertTrue(vertexInfo.getOutputEdges().size() == 0);
            assertTrue(vertexInfo.getOutputVertices().size() == 0);
            assertTrue(vertexInfo.getInputVertices().size() == 1);
        }
    }
    assertTrue(lastSourceTA.equals(lastDataEventSourceTA));
}
Also used : TaskInfo(org.apache.tez.history.parser.datamodel.TaskInfo) DataDependencyEvent(org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent) WordCount(org.apache.tez.examples.WordCount) TaskAttemptInfo(org.apache.tez.history.parser.datamodel.TaskAttemptInfo) VertexInfo(org.apache.tez.history.parser.datamodel.VertexInfo) EdgeInfo(org.apache.tez.history.parser.datamodel.EdgeInfo)

Example 2 with WordCount

use of org.apache.tez.examples.WordCount in project tez by apache.

the class TestHistoryParser method testParserWithSuccessfulJob.

/**
 * Run a word count example in mini cluster and check if it is possible to download
 * data from ATS and parse it. Also, run with SimpleHistoryLogging option and verify
 * if it matches with ATS data.
 *
 * @throws Exception
 */
@Test
public void testParserWithSuccessfulJob() throws Exception {
    // Run basic word count example.
    String dagId = runWordCount(WordCount.TokenProcessor.class.getName(), WordCount.SumProcessor.class.getName(), "WordCount", true);
    // Export the data from ATS
    String[] args = { "--dagId=" + dagId, "--downloadDir=" + DOWNLOAD_DIR, "--yarnTimelineAddress=" + yarnTimelineAddress };
    int result = ATSImportTool.process(args);
    assertTrue(result == 0);
    // Parse ATS data and verify results
    DagInfo dagInfoFromATS = getDagInfo(dagId);
    verifyDagInfo(dagInfoFromATS, true);
    verifyJobSpecificInfo(dagInfoFromATS);
    checkConfig(dagInfoFromATS);
    // Now run with SimpleHistoryLogging
    dagId = runWordCount(WordCount.TokenProcessor.class.getName(), WordCount.SumProcessor.class.getName(), "WordCount", false);
    // For all flushes to happen and to avoid half-cooked download.
    Thread.sleep(10000);
    DagInfo shDagInfo = getDagInfoFromSimpleHistory(dagId);
    verifyDagInfo(shDagInfo, false);
    verifyJobSpecificInfo(shDagInfo);
    // Compare dagInfo by parsing ATS data with DagInfo obtained by parsing SimpleHistoryLog
    isDAGEqual(dagInfoFromATS, shDagInfo);
}
Also used : DagInfo(org.apache.tez.history.parser.datamodel.DagInfo) WordCount(org.apache.tez.examples.WordCount) Test(org.junit.Test)

Aggregations

WordCount (org.apache.tez.examples.WordCount)2 DagInfo (org.apache.tez.history.parser.datamodel.DagInfo)1 EdgeInfo (org.apache.tez.history.parser.datamodel.EdgeInfo)1 TaskAttemptInfo (org.apache.tez.history.parser.datamodel.TaskAttemptInfo)1 DataDependencyEvent (org.apache.tez.history.parser.datamodel.TaskAttemptInfo.DataDependencyEvent)1 TaskInfo (org.apache.tez.history.parser.datamodel.TaskInfo)1 VertexInfo (org.apache.tez.history.parser.datamodel.VertexInfo)1 Test (org.junit.Test)1