Search in sources :

Example 11 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestDAGRecovery method testTARecoverFromNewToKilled.

/**
 * RecoveryEvents: TaskAttemptFinishedEvent (KILLED)
 * Recover it to KILLED
 */
@Test(timeout = 5000)
public void testTARecoverFromNewToKilled() {
    initMockDAGRecoveryDataForTaskAttempt();
    TaskAttemptFinishedEvent taFinishedEvent = new TaskAttemptFinishedEvent(ta1t1v1Id, "v1", ta1LaunchTime, ta1FinishedTime, TaskAttemptState.KILLED, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, "", null, null, null, 0L, null, 0L, null, null, null, null, null);
    TaskAttemptRecoveryData taRecoveryData = new TaskAttemptRecoveryData(null, taFinishedEvent);
    doReturn(taRecoveryData).when(dagRecoveryData).getTaskAttemptRecoveryData(ta1t1v1Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    TaskImpl task = (TaskImpl) dag.getVertex(v1Id).getTask(t1v1Id);
    TaskAttemptImpl taskAttempt = (TaskAttemptImpl) task.getAttempt(ta1t1v1Id);
    assertEquals(TaskAttemptStateInternal.KILLED, taskAttempt.getInternalState());
    assertEquals(TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, taskAttempt.getTerminationCause());
    historyEventHandler.verifyHistoryEvent(0, HistoryEventType.TASK_ATTEMPT_STARTED);
    historyEventHandler.verifyHistoryEvent(0, HistoryEventType.TASK_ATTEMPT_FINISHED);
    assertEquals(0, task.failedAttempts);
    assertEquals(ta1FinishedTime, taskAttempt.getFinishTime());
}
Also used : DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 12 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestDAGRecovery method testTARecoverFromSucceeded_OutputCommitterRecoveryNotSupported.

/**
 * RecoveryEvents: TaskAttemptStartedEvent -> TaskAttemptFinishedEvent (SUCCEEDED)
 * Recovered it SUCCEEDED, but task schedule new task attempt
 * V2's committer is not recovery supported
 */
// (timeout=5000)
@Test
public void testTARecoverFromSucceeded_OutputCommitterRecoveryNotSupported() throws Exception {
    initMockDAGRecoveryDataForTaskAttempt();
    // set up v2 recovery data
    // ta1t1v2: TaskAttemptStartedEvent -> TaskAttemptFinishedEvent(SUCCEEDED)
    // t1v2: TaskStartedEvent
    // v2: VertexInitializedEvent -> VertexConfigurationDoneEvent -> VertexStartedEvent
    TaskAttemptStartedEvent taStartedEvent = new TaskAttemptStartedEvent(ta1t1v2Id, "vertex2", ta1LaunchTime, mock(ContainerId.class), mock(NodeId.class), "", "", "");
    List<TezEvent> taGeneratedEvents = new ArrayList<TezEvent>();
    EventMetaData metadata = new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", ta1t1v2Id);
    taGeneratedEvents.add(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), metadata));
    TaskAttemptFinishedEvent taFinishedEvent = new TaskAttemptFinishedEvent(ta1t1v2Id, "vertex2", ta1LaunchTime, ta1FinishedTime, TaskAttemptState.SUCCEEDED, null, null, "", null, null, taGeneratedEvents, 0L, null, 0L, null, null, null, null, null);
    TaskAttemptRecoveryData taRecoveryData = new TaskAttemptRecoveryData(taStartedEvent, taFinishedEvent);
    doReturn(taRecoveryData).when(dagRecoveryData).getTaskAttemptRecoveryData(ta1t1v2Id);
    Map<TezTaskAttemptID, TaskAttemptRecoveryData> taRecoveryDataMap = new HashMap<TezTaskAttemptID, TaskAttemptRecoveryData>();
    taRecoveryDataMap.put(ta1t1v2Id, taRecoveryData);
    TaskStartedEvent t1StartedEvent = new TaskStartedEvent(t1v2Id, "vertex2", 0L, t1StartedTime);
    TaskRecoveryData taskRecoveryData = new TaskRecoveryData(t1StartedEvent, null, taRecoveryDataMap);
    Map<TezTaskID, TaskRecoveryData> taskRecoveryDataMap = new HashMap<TezTaskID, TaskRecoveryData>();
    taskRecoveryDataMap.put(t1v2Id, taskRecoveryData);
    doReturn(taskRecoveryData).when(dagRecoveryData).getTaskRecoveryData(t1v2Id);
    VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "vertex2", 0L, v1InitedTime, v1NumTask, "", null, null, null);
    VertexConfigurationDoneEvent v2ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v2Id, 0L, v1NumTask, null, null, null, false);
    VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, v1StartedTime);
    VertexRecoveryData v2RecoveryData = new VertexRecoveryData(v2InitedEvent, v2ReconfigureDoneEvent, v2StartedEvent, null, taskRecoveryDataMap, false);
    doReturn(v2RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v2Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    TaskImpl task = (TaskImpl) dag.getVertex(v2Id).getTask(t1v2Id);
    TaskAttemptImpl taskAttempt = (TaskAttemptImpl) task.getAttempt(ta1t1v2Id);
    assertEquals(TaskAttemptStateInternal.KILLED, taskAttempt.getInternalState());
    historyEventHandler.verifyHistoryEvent(1, HistoryEventType.TASK_ATTEMPT_FINISHED);
    assertEquals(TaskStateInternal.RUNNING, task.getInternalState());
    // new task attempt is scheduled
    assertEquals(2, task.getAttempts().size());
    assertEquals(ta1LaunchTime, taskAttempt.getLaunchTime());
    assertEquals(ta1FinishedTime, taskAttempt.getFinishTime());
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 13 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class TestHistoryEventJsonConversion method testHandlerExists.

@Test(timeout = 5000)
public void testHandlerExists() throws JSONException {
    for (HistoryEventType eventType : HistoryEventType.values()) {
        HistoryEvent event = null;
        switch(eventType) {
            case APP_LAUNCHED:
                event = new AppLaunchedEvent(applicationId, random.nextInt(), random.nextInt(), user, new Configuration(false), null);
                break;
            case AM_LAUNCHED:
                event = new AMLaunchedEvent(applicationAttemptId, random.nextInt(), random.nextInt(), user);
                break;
            case AM_STARTED:
                event = new AMStartedEvent(applicationAttemptId, random.nextInt(), user);
                break;
            case DAG_SUBMITTED:
                event = new DAGSubmittedEvent(tezDAGID, random.nextInt(), dagPlan, applicationAttemptId, null, user, null, null, "Q_" + eventType.name());
                break;
            case DAG_INITIALIZED:
                event = new DAGInitializedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName(), null);
                break;
            case DAG_STARTED:
                event = new DAGStartedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName());
                break;
            case DAG_FINISHED:
                event = new DAGFinishedEvent(tezDAGID, random.nextInt(), random.nextInt(), DAGState.ERROR, null, null, user, dagPlan.getName(), null, applicationAttemptId, dagPlan);
                break;
            case VERTEX_INITIALIZED:
                event = new VertexInitializedEvent(tezVertexID, "v1", random.nextInt(), random.nextInt(), random.nextInt(), "proc", null, null, null);
                break;
            case VERTEX_STARTED:
                event = new VertexStartedEvent(tezVertexID, random.nextInt(), random.nextInt());
                break;
            case VERTEX_CONFIGURE_DONE:
                event = new VertexConfigurationDoneEvent(tezVertexID, 0L, 1, null, null, null, true);
                break;
            case VERTEX_FINISHED:
                event = new VertexFinishedEvent(tezVertexID, "v1", 1, random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), VertexState.ERROR, null, null, null, null, null);
                break;
            case TASK_STARTED:
                event = new TaskStartedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt());
                break;
            case TASK_FINISHED:
                event = new TaskFinishedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt(), tezTaskAttemptID, TaskState.FAILED, null, null, 0);
                break;
            case TASK_ATTEMPT_STARTED:
                event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", random.nextInt(), containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case TASK_ATTEMPT_FINISHED:
                event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(), random.nextInt(), TaskAttemptState.KILLED, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, null, null, null, null, 0, null, 0, containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case CONTAINER_LAUNCHED:
                event = new ContainerLaunchedEvent(containerId, random.nextInt(), applicationAttemptId);
                break;
            case CONTAINER_STOPPED:
                event = new ContainerStoppedEvent(containerId, random.nextInt(), -1, applicationAttemptId);
                break;
            case DAG_COMMIT_STARTED:
                event = new DAGCommitStartedEvent();
                break;
            case VERTEX_COMMIT_STARTED:
                event = new VertexCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_STARTED:
                event = new VertexGroupCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_FINISHED:
                event = new VertexGroupCommitFinishedEvent();
                break;
            case DAG_RECOVERED:
                event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, dagPlan.getName(), user, 1l, null);
                break;
            case DAG_KILL_REQUEST:
                event = new DAGKillRequestEvent();
                break;
            default:
                Assert.fail("Unhandled event type " + eventType);
        }
        if (event == null || !event.isHistoryEvent()) {
            continue;
        }
        JSONObject json = HistoryEventJsonConversion.convertToJson(event);
        if (eventType == HistoryEventType.DAG_SUBMITTED) {
            try {
                Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.OTHER_INFO).getString(ATSConstants.DAG_QUEUE_NAME));
                Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.PRIMARY_FILTERS).getString(ATSConstants.DAG_QUEUE_NAME));
            } catch (JSONException ex) {
                Assert.fail("Exception: " + ex.getMessage() + " for type: " + eventType);
            }
        }
    }
}
Also used : DAGCommitStartedEvent(org.apache.tez.dag.history.events.DAGCommitStartedEvent) Configuration(org.apache.hadoop.conf.Configuration) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) ContainerStoppedEvent(org.apache.tez.dag.history.events.ContainerStoppedEvent) DAGKillRequestEvent(org.apache.tez.dag.history.events.DAGKillRequestEvent) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) DAGRecoveredEvent(org.apache.tez.dag.history.events.DAGRecoveredEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) JSONException(org.codehaus.jettison.json.JSONException) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) AppLaunchedEvent(org.apache.tez.dag.history.events.AppLaunchedEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) JSONObject(org.codehaus.jettison.json.JSONObject) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) AMLaunchedEvent(org.apache.tez.dag.history.events.AMLaunchedEvent) ContainerLaunchedEvent(org.apache.tez.dag.history.events.ContainerLaunchedEvent) DAGFinishedEvent(org.apache.tez.dag.history.events.DAGFinishedEvent) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) VertexCommitStartedEvent(org.apache.tez.dag.history.events.VertexCommitStartedEvent) Test(org.junit.Test)

Example 14 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class HistoryEventHandler method shouldLogTaskAttemptEvents.

// If the log level is set to TASK_ATTEMPT and filters are configured, then we should suppress
// the start event and publish it only when TaskAttemptFinishedEvent is received after
// matching against the filter.
// Note: if the AM is killed before we get the TaskAttemptFinishedEvent, we'll lose this event.
private boolean shouldLogTaskAttemptEvents(DAGHistoryEvent event, HistoryLogLevel dagLogLevel) {
    HistoryEvent historyEvent = event.getHistoryEvent();
    HistoryEventType eventType = historyEvent.getEventType();
    if (dagLogLevel == HistoryLogLevel.TASK_ATTEMPT && (eventType == HistoryEventType.TASK_ATTEMPT_STARTED || eventType == HistoryEventType.TASK_ATTEMPT_FINISHED)) {
        TezDAGID dagId = event.getDagID();
        Set<TaskAttemptTerminationCause> filters = null;
        if (dagId != null) {
            filters = dagIdToTaskAttemptFilters.get(dagId);
        }
        if (filters == null) {
            filters = amTaskAttemptFilters;
        }
        if (filters == null) {
            return true;
        }
        if (eventType == HistoryEventType.TASK_ATTEMPT_STARTED) {
            suppressedEvents.put(((TaskAttemptStartedEvent) historyEvent).getTaskAttemptID(), event);
            return false;
        } else {
            // TaskAttemptFinishedEvent
            TaskAttemptFinishedEvent finishedEvent = (TaskAttemptFinishedEvent) historyEvent;
            if (filters.contains(finishedEvent.getTaskAttemptError())) {
                suppressedEvents.remove(finishedEvent.getTaskAttemptID());
                return false;
            }
        }
    }
    return true;
}
Also used : TezDAGID(org.apache.tez.dag.records.TezDAGID) TaskAttemptTerminationCause(org.apache.tez.dag.records.TaskAttemptTerminationCause) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)

Example 15 with TaskAttemptFinishedEvent

use of org.apache.tez.dag.history.events.TaskAttemptFinishedEvent in project tez by apache.

the class RecoveryParser method parseRecoveryData.

/**
 * 1. Read Summary Recovery file and build DAGSummaryData
 *    Check whether it is recoverable based on the summary file (whether dag is
 *    in the middle of committing)
 * 2. Read the non-Summary Recovery file and build DAGRecoveryData
 *    Check whether it is recoverable based on both the summary file and non-summary file
 *    (whether vertex has completed its committing, but its full non-summary recovery events are not seen)
 * @return DAGRecoveryData
 * @throws IOException
 */
public DAGRecoveryData parseRecoveryData() throws IOException {
    int dagCounter = 0;
    Map<TezDAGID, DAGSummaryData> dagSummaryDataMap = new HashMap<TezDAGID, DAGSummaryData>();
    List<Path> summaryFiles = getSummaryFiles();
    LOG.debug("SummaryFile size:" + summaryFiles.size());
    for (Path summaryFile : summaryFiles) {
        FileStatus summaryFileStatus = recoveryFS.getFileStatus(summaryFile);
        LOG.info("Parsing summary file" + ", path=" + summaryFile.toString() + ", len=" + summaryFileStatus.getLen() + ", lastModTime=" + summaryFileStatus.getModificationTime());
        FSDataInputStream summaryStream = getSummaryStream(summaryFile);
        while (true) {
            RecoveryProtos.SummaryEventProto proto;
            try {
                proto = RecoveryProtos.SummaryEventProto.parseDelimitedFrom(summaryStream);
                if (proto == null) {
                    LOG.info("Reached end of summary stream");
                    break;
                }
            } catch (EOFException eof) {
                LOG.info("Reached end of summary stream");
                break;
            }
            HistoryEventType eventType = HistoryEventType.values()[proto.getEventType()];
            if (LOG.isDebugEnabled()) {
                LOG.debug("[RECOVERY SUMMARY]" + " dagId=" + proto.getDagId() + ", timestamp=" + proto.getTimestamp() + ", event=" + eventType);
            }
            TezDAGID dagId;
            try {
                dagId = TezDAGID.fromString(proto.getDagId());
            } catch (IllegalArgumentException e) {
                throw new IOException("Invalid dagId, summary records may be corrupted", e);
            }
            if (dagCounter < dagId.getId()) {
                dagCounter = dagId.getId();
            }
            if (!dagSummaryDataMap.containsKey(dagId)) {
                dagSummaryDataMap.put(dagId, new DAGSummaryData(dagId));
            }
            try {
                dagSummaryDataMap.get(dagId).handleSummaryEvent(proto);
            } catch (Exception e) {
                // any exception when parsing protobuf
                throw new IOException("Error when parsing summary event proto", e);
            }
        }
        summaryStream.close();
    }
    // Set counter for next set of DAGs & update dagNames Set in DAGAppMaster
    dagAppMaster.setDAGCounter(dagCounter);
    for (DAGSummaryData dagSummaryData : dagSummaryDataMap.values()) {
        dagAppMaster.dagIDs.add(dagSummaryData.dagId.toString());
    }
    DAGSummaryData lastInProgressDAGData = getLastCompletedOrInProgressDAG(dagSummaryDataMap);
    if (lastInProgressDAGData == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }
    TezDAGID lastInProgressDAG = lastInProgressDAGData.dagId;
    if (lastInProgressDAG == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }
    LOG.info("Checking if DAG is in recoverable state" + ", dagId=" + lastInProgressDAGData.dagId);
    final DAGRecoveryData recoveredDAGData = new DAGRecoveryData(lastInProgressDAGData);
    List<Path> dagRecoveryFiles = getDAGRecoveryFiles(lastInProgressDAG);
    boolean skipAllOtherEvents = false;
    Path lastRecoveryFile = null;
    // to create the DAGImpl)
    for (Path dagRecoveryFile : dagRecoveryFiles) {
        if (skipAllOtherEvents) {
            LOG.warn("Other recovery files will be skipped due to error in the previous recovery file" + lastRecoveryFile);
            break;
        }
        FileStatus fileStatus = recoveryFS.getFileStatus(dagRecoveryFile);
        lastRecoveryFile = dagRecoveryFile;
        LOG.info("Trying to recover dag from recovery file" + ", dagId=" + lastInProgressDAG.toString() + ", dagRecoveryFile=" + dagRecoveryFile + ", len=" + fileStatus.getLen());
        FSDataInputStream dagRecoveryStream = recoveryFS.open(dagRecoveryFile, recoveryBufferSize);
        while (true) {
            HistoryEvent event;
            try {
                event = getNextEvent(dagRecoveryStream);
                if (event == null) {
                    LOG.info("Reached end of dag recovery stream");
                    break;
                }
            } catch (EOFException eof) {
                LOG.info("Reached end of dag recovery stream");
                break;
            } catch (IOException ioe) {
                LOG.warn("Corrupt data found when trying to read next event", ioe);
                break;
            }
            if (skipAllOtherEvents) {
                // hit an error - skip reading other events
                break;
            }
            HistoryEventType eventType = event.getEventType();
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            switch(eventType) {
                case DAG_SUBMITTED:
                    DAGSubmittedEvent submittedEvent = (DAGSubmittedEvent) event;
                    recoveredDAGData.recoveredDAG = dagAppMaster.createDAG(submittedEvent.getDAGPlan(), lastInProgressDAG);
                    recoveredDAGData.cumulativeAdditionalResources = submittedEvent.getCumulativeAdditionalLocalResources();
                    recoveredDAGData.recoveredDagID = recoveredDAGData.recoveredDAG.getID();
                    dagAppMaster.setCurrentDAG(recoveredDAGData.recoveredDAG);
                    if (recoveredDAGData.nonRecoverable) {
                        skipAllOtherEvents = true;
                    }
                    break;
                case DAG_INITIALIZED:
                    recoveredDAGData.dagInitedEvent = (DAGInitializedEvent) event;
                    break;
                case DAG_STARTED:
                    recoveredDAGData.dagStartedEvent = (DAGStartedEvent) event;
                    break;
                case DAG_FINISHED:
                    recoveredDAGData.dagFinishedEvent = (DAGFinishedEvent) event;
                    skipAllOtherEvents = true;
                    break;
                case DAG_COMMIT_STARTED:
                case VERTEX_GROUP_COMMIT_STARTED:
                case VERTEX_GROUP_COMMIT_FINISHED:
                case CONTAINER_LAUNCHED:
                    {
                        // Nothing to do for now
                        break;
                    }
                case DAG_KILL_REQUEST:
                    {
                        break;
                    }
                case VERTEX_INITIALIZED:
                    {
                        VertexInitializedEvent vertexInitEvent = (VertexInitializedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexInitEvent.getVertexID());
                        vertexRecoveryData.vertexInitedEvent = vertexInitEvent;
                        break;
                    }
                case VERTEX_CONFIGURE_DONE:
                    {
                        VertexConfigurationDoneEvent reconfigureDoneEvent = (VertexConfigurationDoneEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(reconfigureDoneEvent.getVertexID());
                        vertexRecoveryData.vertexConfigurationDoneEvent = reconfigureDoneEvent;
                        break;
                    }
                case VERTEX_STARTED:
                    {
                        VertexStartedEvent vertexStartedEvent = (VertexStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(vertexStartedEvent.getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "No VertexInitializedEvent before VertexStartedEvent");
                        vertexRecoveryData.vertexStartedEvent = vertexStartedEvent;
                        break;
                    }
                case VERTEX_COMMIT_STARTED:
                    {
                        break;
                    }
                case VERTEX_FINISHED:
                    {
                        VertexFinishedEvent vertexFinishedEvent = (VertexFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexFinishedEvent.getVertexID());
                        vertexRecoveryData.vertexFinishedEvent = vertexFinishedEvent;
                        break;
                    }
                case TASK_STARTED:
                    {
                        TaskStartedEvent taskStartedEvent = (TaskStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskStartedEvent.getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskStartedEvent, its vertex does not exist:" + taskStartedEvent.getTaskID().getVertexID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskStartedEvent.getTaskID());
                        taskRecoveryData.taskStartedEvent = taskStartedEvent;
                        break;
                    }
                case TASK_FINISHED:
                    {
                        TaskFinishedEvent taskFinishedEvent = (TaskFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskFinishedEvent.getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskFinishedEvent, its vertex does not exist:" + taskFinishedEvent.getTaskID().getVertexID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskFinishedEvent.getTaskID());
                        taskRecoveryData.taskFinishedEvent = taskFinishedEvent;
                        break;
                    }
                case TASK_ATTEMPT_STARTED:
                    {
                        TaskAttemptStartedEvent taStartedEvent = (TaskAttemptStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemptStartedEvent, its vertexId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID());
                        Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptStartedEvent, its taskId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
                        TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taStartedEvent.getTaskAttemptID());
                        taRecoveryData.taStartedEvent = taStartedEvent;
                        break;
                    }
                case TASK_ATTEMPT_FINISHED:
                    {
                        TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemtFinishedEvent, its vertexId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID());
                        Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptFinishedEvent, its taskId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
                        TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taFinishedEvent.getTaskAttemptID());
                        taRecoveryData.taFinishedEvent = taFinishedEvent;
                        break;
                    }
                default:
                    throw new RuntimeException("Invalid data found, unknown event type " + eventType);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("[DAG RECOVERY]" + " dagId=" + lastInProgressDAG + ", eventType=" + eventType + ", event=" + event.toString());
            }
        }
        dagRecoveryStream.close();
    }
    recoveredDAGData.checkRecoverableNonSummary();
    return recoveredDAGData;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) SummaryEventProto(org.apache.tez.dag.recovery.records.RecoveryProtos.SummaryEventProto) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) TezDAGID(org.apache.tez.dag.records.TezDAGID) EOFException(java.io.EOFException) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) Path(org.apache.hadoop.fs.Path) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) RecoveryProtos(org.apache.tez.dag.recovery.records.RecoveryProtos) IOException(java.io.IOException) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) EOFException(java.io.EOFException) IOException(java.io.IOException) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Aggregations

TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)18 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)11 Test (org.junit.Test)11 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)9 NodeId (org.apache.hadoop.yarn.api.records.NodeId)8 TaskAttemptRecoveryData (org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData)8 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)8 StateChangeNotifierForTest (org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)7 DAGEventRecoverEvent (org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent)7 VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)7 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)6 TaskFinishedEvent (org.apache.tez.dag.history.events.TaskFinishedEvent)6 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)6 VertexFinishedEvent (org.apache.tez.dag.history.events.VertexFinishedEvent)5 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 AMStartedEvent (org.apache.tez.dag.history.events.AMStartedEvent)4 ContainerLaunchedEvent (org.apache.tez.dag.history.events.ContainerLaunchedEvent)4