Search in sources :

Example 11 with VertexStartedEvent

use of org.apache.tez.dag.history.events.VertexStartedEvent in project tez by apache.

the class TestDAGRecovery method testTARecoverFromSucceeded_OutputCommitterRecoveryNotSupported.

/**
 * RecoveryEvents: TaskAttemptStartedEvent -> TaskAttemptFinishedEvent (SUCCEEDED)
 * Recovered it SUCCEEDED, but task schedule new task attempt
 * V2's committer is not recovery supported
 */
// (timeout=5000)
@Test
public void testTARecoverFromSucceeded_OutputCommitterRecoveryNotSupported() throws Exception {
    initMockDAGRecoveryDataForTaskAttempt();
    // set up v2 recovery data
    // ta1t1v2: TaskAttemptStartedEvent -> TaskAttemptFinishedEvent(SUCCEEDED)
    // t1v2: TaskStartedEvent
    // v2: VertexInitializedEvent -> VertexConfigurationDoneEvent -> VertexStartedEvent
    TaskAttemptStartedEvent taStartedEvent = new TaskAttemptStartedEvent(ta1t1v2Id, "vertex2", ta1LaunchTime, mock(ContainerId.class), mock(NodeId.class), "", "", "");
    List<TezEvent> taGeneratedEvents = new ArrayList<TezEvent>();
    EventMetaData metadata = new EventMetaData(EventProducerConsumerType.OUTPUT, "vertex2", "vertex3", ta1t1v2Id);
    taGeneratedEvents.add(new TezEvent(DataMovementEvent.create(ByteBuffer.wrap(new byte[0])), metadata));
    TaskAttemptFinishedEvent taFinishedEvent = new TaskAttemptFinishedEvent(ta1t1v2Id, "vertex2", ta1LaunchTime, ta1FinishedTime, TaskAttemptState.SUCCEEDED, null, null, "", null, null, taGeneratedEvents, 0L, null, 0L, null, null, null, null, null);
    TaskAttemptRecoveryData taRecoveryData = new TaskAttemptRecoveryData(taStartedEvent, taFinishedEvent);
    doReturn(taRecoveryData).when(dagRecoveryData).getTaskAttemptRecoveryData(ta1t1v2Id);
    Map<TezTaskAttemptID, TaskAttemptRecoveryData> taRecoveryDataMap = new HashMap<TezTaskAttemptID, TaskAttemptRecoveryData>();
    taRecoveryDataMap.put(ta1t1v2Id, taRecoveryData);
    TaskStartedEvent t1StartedEvent = new TaskStartedEvent(t1v2Id, "vertex2", 0L, t1StartedTime);
    TaskRecoveryData taskRecoveryData = new TaskRecoveryData(t1StartedEvent, null, taRecoveryDataMap);
    Map<TezTaskID, TaskRecoveryData> taskRecoveryDataMap = new HashMap<TezTaskID, TaskRecoveryData>();
    taskRecoveryDataMap.put(t1v2Id, taskRecoveryData);
    doReturn(taskRecoveryData).when(dagRecoveryData).getTaskRecoveryData(t1v2Id);
    VertexInitializedEvent v2InitedEvent = new VertexInitializedEvent(v2Id, "vertex2", 0L, v1InitedTime, v1NumTask, "", null, null, null);
    VertexConfigurationDoneEvent v2ReconfigureDoneEvent = new VertexConfigurationDoneEvent(v2Id, 0L, v1NumTask, null, null, null, false);
    VertexStartedEvent v2StartedEvent = new VertexStartedEvent(v2Id, 0L, v1StartedTime);
    VertexRecoveryData v2RecoveryData = new VertexRecoveryData(v2InitedEvent, v2ReconfigureDoneEvent, v2StartedEvent, null, taskRecoveryDataMap, false);
    doReturn(v2RecoveryData).when(dagRecoveryData).getVertexRecoveryData(v2Id);
    dag.handle(new DAGEventRecoverEvent(dagId, dagRecoveryData));
    dispatcher.await();
    TaskImpl task = (TaskImpl) dag.getVertex(v2Id).getTask(t1v2Id);
    TaskAttemptImpl taskAttempt = (TaskAttemptImpl) task.getAttempt(ta1t1v2Id);
    assertEquals(TaskAttemptStateInternal.KILLED, taskAttempt.getInternalState());
    historyEventHandler.verifyHistoryEvent(1, HistoryEventType.TASK_ATTEMPT_FINISHED);
    assertEquals(TaskStateInternal.RUNNING, task.getInternalState());
    // new task attempt is scheduled
    assertEquals(2, task.getAttempts().size());
    assertEquals(ta1LaunchTime, taskAttempt.getLaunchTime());
    assertEquals(ta1FinishedTime, taskAttempt.getFinishTime());
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskAttemptRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskAttemptRecoveryData) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskRecoveryData(org.apache.tez.dag.app.RecoveryParser.TaskRecoveryData) DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) VertexRecoveryData(org.apache.tez.dag.app.RecoveryParser.VertexRecoveryData) TezEvent(org.apache.tez.runtime.api.impl.TezEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) EventMetaData(org.apache.tez.runtime.api.impl.EventMetaData) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) Test(org.junit.Test) StateChangeNotifierForTest(org.apache.tez.dag.app.dag.TestStateChangeNotifier.StateChangeNotifierForTest)

Example 12 with VertexStartedEvent

use of org.apache.tez.dag.history.events.VertexStartedEvent in project tez by apache.

the class TestHistoryEventJsonConversion method testHandlerExists.

@Test(timeout = 5000)
public void testHandlerExists() throws JSONException {
    for (HistoryEventType eventType : HistoryEventType.values()) {
        HistoryEvent event = null;
        switch(eventType) {
            case APP_LAUNCHED:
                event = new AppLaunchedEvent(applicationId, random.nextInt(), random.nextInt(), user, new Configuration(false), null);
                break;
            case AM_LAUNCHED:
                event = new AMLaunchedEvent(applicationAttemptId, random.nextInt(), random.nextInt(), user);
                break;
            case AM_STARTED:
                event = new AMStartedEvent(applicationAttemptId, random.nextInt(), user);
                break;
            case DAG_SUBMITTED:
                event = new DAGSubmittedEvent(tezDAGID, random.nextInt(), dagPlan, applicationAttemptId, null, user, null, null, "Q_" + eventType.name());
                break;
            case DAG_INITIALIZED:
                event = new DAGInitializedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName(), null);
                break;
            case DAG_STARTED:
                event = new DAGStartedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName());
                break;
            case DAG_FINISHED:
                event = new DAGFinishedEvent(tezDAGID, random.nextInt(), random.nextInt(), DAGState.ERROR, null, null, user, dagPlan.getName(), null, applicationAttemptId, dagPlan);
                break;
            case VERTEX_INITIALIZED:
                event = new VertexInitializedEvent(tezVertexID, "v1", random.nextInt(), random.nextInt(), random.nextInt(), "proc", null, null, null);
                break;
            case VERTEX_STARTED:
                event = new VertexStartedEvent(tezVertexID, random.nextInt(), random.nextInt());
                break;
            case VERTEX_CONFIGURE_DONE:
                event = new VertexConfigurationDoneEvent(tezVertexID, 0L, 1, null, null, null, true);
                break;
            case VERTEX_FINISHED:
                event = new VertexFinishedEvent(tezVertexID, "v1", 1, random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), VertexState.ERROR, null, null, null, null, null);
                break;
            case TASK_STARTED:
                event = new TaskStartedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt());
                break;
            case TASK_FINISHED:
                event = new TaskFinishedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt(), tezTaskAttemptID, TaskState.FAILED, null, null, 0);
                break;
            case TASK_ATTEMPT_STARTED:
                event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", random.nextInt(), containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case TASK_ATTEMPT_FINISHED:
                event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(), random.nextInt(), TaskAttemptState.KILLED, null, TaskAttemptTerminationCause.TERMINATED_BY_CLIENT, null, null, null, null, 0, null, 0, containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case CONTAINER_LAUNCHED:
                event = new ContainerLaunchedEvent(containerId, random.nextInt(), applicationAttemptId);
                break;
            case CONTAINER_STOPPED:
                event = new ContainerStoppedEvent(containerId, random.nextInt(), -1, applicationAttemptId);
                break;
            case DAG_COMMIT_STARTED:
                event = new DAGCommitStartedEvent();
                break;
            case VERTEX_COMMIT_STARTED:
                event = new VertexCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_STARTED:
                event = new VertexGroupCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_FINISHED:
                event = new VertexGroupCommitFinishedEvent();
                break;
            case DAG_RECOVERED:
                event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, dagPlan.getName(), user, 1l, null);
                break;
            case DAG_KILL_REQUEST:
                event = new DAGKillRequestEvent();
                break;
            default:
                Assert.fail("Unhandled event type " + eventType);
        }
        if (event == null || !event.isHistoryEvent()) {
            continue;
        }
        JSONObject json = HistoryEventJsonConversion.convertToJson(event);
        if (eventType == HistoryEventType.DAG_SUBMITTED) {
            try {
                Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.OTHER_INFO).getString(ATSConstants.DAG_QUEUE_NAME));
                Assert.assertEquals("Q_" + eventType.name(), json.getJSONObject(ATSConstants.PRIMARY_FILTERS).getString(ATSConstants.DAG_QUEUE_NAME));
            } catch (JSONException ex) {
                Assert.fail("Exception: " + ex.getMessage() + " for type: " + eventType);
            }
        }
    }
}
Also used : DAGCommitStartedEvent(org.apache.tez.dag.history.events.DAGCommitStartedEvent) Configuration(org.apache.hadoop.conf.Configuration) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) ContainerStoppedEvent(org.apache.tez.dag.history.events.ContainerStoppedEvent) DAGKillRequestEvent(org.apache.tez.dag.history.events.DAGKillRequestEvent) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) DAGRecoveredEvent(org.apache.tez.dag.history.events.DAGRecoveredEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) JSONException(org.codehaus.jettison.json.JSONException) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) AppLaunchedEvent(org.apache.tez.dag.history.events.AppLaunchedEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) JSONObject(org.codehaus.jettison.json.JSONObject) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) AMLaunchedEvent(org.apache.tez.dag.history.events.AMLaunchedEvent) ContainerLaunchedEvent(org.apache.tez.dag.history.events.ContainerLaunchedEvent) DAGFinishedEvent(org.apache.tez.dag.history.events.DAGFinishedEvent) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) VertexCommitStartedEvent(org.apache.tez.dag.history.events.VertexCommitStartedEvent) Test(org.junit.Test)

Example 13 with VertexStartedEvent

use of org.apache.tez.dag.history.events.VertexStartedEvent in project tez by apache.

the class VertexImpl method startVertex.

private VertexState startVertex() {
    Preconditions.checkState(getState() == VertexState.INITED, "Vertex must be inited " + logIdentifier);
    if (recoveryData != null && recoveryData.isVertexStarted()) {
        VertexStartedEvent vertexStartedEvent = recoveryData.getVertexStartedEvent();
        this.startedTime = vertexStartedEvent.getStartTime();
    } else {
        this.startedTime = clock.getTime();
    }
    try {
        vertexManager.onVertexStarted(getTaskAttemptIdentifiers(dag, pendingReportedSrcCompletions));
    } catch (AMUserCodeException e) {
        String msg = "Exception in " + e.getSource() + ", vertex=" + logIdentifier;
        LOG.error(msg, e);
        addDiagnostic(msg + "," + ExceptionUtils.getStackTrace(e.getCause()));
        tryEnactKill(VertexTerminationCause.AM_USERCODE_FAILURE, TaskTerminationCause.AM_USERCODE_FAILURE);
        return VertexState.TERMINATING;
    }
    pendingReportedSrcCompletions.clear();
    logJobHistoryVertexStartedEvent();
    // the vertex is fully configured by the time it starts. Always notify completely configured
    // unless the vertex manager has told us that it is going to reconfigure it further.
    // If the vertex was pre-configured then the event would have been sent out earlier. Calling again
    // would be a no-op. If the vertex was not fully configured and waiting for that to complete then
    // we would start immediately after that. Either parallelism updated (now) or IPO changed (future)
    // or vertex added (future). Simplify these cases by sending the event now automatically for the
    // user as if they had invoked the planned()/done() API's.
    maybeSendConfiguredEvent();
    // when we are ready
    if (targetVertices != null) {
        for (Vertex targetVertex : targetVertices.keySet()) {
            eventHandler.handle(new VertexEventSourceVertexStarted(targetVertex.getVertexId(), getVertexId(), distanceFromRoot));
        }
    }
    // If we have no tasks, just transition to vertex completed
    if (this.numTasks == 0) {
        eventHandler.handle(new VertexEvent(this.vertexId, VertexEventType.V_COMPLETED));
    }
    return VertexState.RUNNING;
}
Also used : VertexEventRecoverVertex(org.apache.tez.dag.app.dag.event.VertexEventRecoverVertex) Vertex(org.apache.tez.dag.app.dag.Vertex) VertexEventSourceVertexStarted(org.apache.tez.dag.app.dag.event.VertexEventSourceVertexStarted) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexEvent(org.apache.tez.dag.app.dag.event.VertexEvent)

Example 14 with VertexStartedEvent

use of org.apache.tez.dag.history.events.VertexStartedEvent in project tez by apache.

the class VertexImpl method logJobHistoryVertexStartedEvent.

void logJobHistoryVertexStartedEvent() {
    if (recoveryData == null || !recoveryData.isVertexStarted()) {
        VertexStartedEvent startEvt = new VertexStartedEvent(vertexId, startTimeRequested, startedTime);
        this.appContext.getHistoryHandler().handle(new DAGHistoryEvent(getDAGId(), startEvt));
    }
}
Also used : VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent)

Example 15 with VertexStartedEvent

use of org.apache.tez.dag.history.events.VertexStartedEvent in project tez by apache.

the class RecoveryParser method parseRecoveryData.

/**
 * 1. Read Summary Recovery file and build DAGSummaryData
 *    Check whether it is recoverable based on the summary file (whether dag is
 *    in the middle of committing)
 * 2. Read the non-Summary Recovery file and build DAGRecoveryData
 *    Check whether it is recoverable based on both the summary file and non-summary file
 *    (whether vertex has completed its committing, but its full non-summary recovery events are not seen)
 * @return DAGRecoveryData
 * @throws IOException
 */
public DAGRecoveryData parseRecoveryData() throws IOException {
    int dagCounter = 0;
    Map<TezDAGID, DAGSummaryData> dagSummaryDataMap = new HashMap<TezDAGID, DAGSummaryData>();
    List<Path> summaryFiles = getSummaryFiles();
    LOG.debug("SummaryFile size:" + summaryFiles.size());
    for (Path summaryFile : summaryFiles) {
        FileStatus summaryFileStatus = recoveryFS.getFileStatus(summaryFile);
        LOG.info("Parsing summary file" + ", path=" + summaryFile.toString() + ", len=" + summaryFileStatus.getLen() + ", lastModTime=" + summaryFileStatus.getModificationTime());
        FSDataInputStream summaryStream = getSummaryStream(summaryFile);
        while (true) {
            RecoveryProtos.SummaryEventProto proto;
            try {
                proto = RecoveryProtos.SummaryEventProto.parseDelimitedFrom(summaryStream);
                if (proto == null) {
                    LOG.info("Reached end of summary stream");
                    break;
                }
            } catch (EOFException eof) {
                LOG.info("Reached end of summary stream");
                break;
            }
            HistoryEventType eventType = HistoryEventType.values()[proto.getEventType()];
            if (LOG.isDebugEnabled()) {
                LOG.debug("[RECOVERY SUMMARY]" + " dagId=" + proto.getDagId() + ", timestamp=" + proto.getTimestamp() + ", event=" + eventType);
            }
            TezDAGID dagId;
            try {
                dagId = TezDAGID.fromString(proto.getDagId());
            } catch (IllegalArgumentException e) {
                throw new IOException("Invalid dagId, summary records may be corrupted", e);
            }
            if (dagCounter < dagId.getId()) {
                dagCounter = dagId.getId();
            }
            if (!dagSummaryDataMap.containsKey(dagId)) {
                dagSummaryDataMap.put(dagId, new DAGSummaryData(dagId));
            }
            try {
                dagSummaryDataMap.get(dagId).handleSummaryEvent(proto);
            } catch (Exception e) {
                // any exception when parsing protobuf
                throw new IOException("Error when parsing summary event proto", e);
            }
        }
        summaryStream.close();
    }
    // Set counter for next set of DAGs & update dagNames Set in DAGAppMaster
    dagAppMaster.setDAGCounter(dagCounter);
    for (DAGSummaryData dagSummaryData : dagSummaryDataMap.values()) {
        dagAppMaster.dagIDs.add(dagSummaryData.dagId.toString());
    }
    DAGSummaryData lastInProgressDAGData = getLastCompletedOrInProgressDAG(dagSummaryDataMap);
    if (lastInProgressDAGData == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }
    TezDAGID lastInProgressDAG = lastInProgressDAGData.dagId;
    if (lastInProgressDAG == null) {
        LOG.info("Nothing to recover as no uncompleted/completed DAGs found");
        return null;
    }
    LOG.info("Checking if DAG is in recoverable state" + ", dagId=" + lastInProgressDAGData.dagId);
    final DAGRecoveryData recoveredDAGData = new DAGRecoveryData(lastInProgressDAGData);
    List<Path> dagRecoveryFiles = getDAGRecoveryFiles(lastInProgressDAG);
    boolean skipAllOtherEvents = false;
    Path lastRecoveryFile = null;
    // to create the DAGImpl)
    for (Path dagRecoveryFile : dagRecoveryFiles) {
        if (skipAllOtherEvents) {
            LOG.warn("Other recovery files will be skipped due to error in the previous recovery file" + lastRecoveryFile);
            break;
        }
        FileStatus fileStatus = recoveryFS.getFileStatus(dagRecoveryFile);
        lastRecoveryFile = dagRecoveryFile;
        LOG.info("Trying to recover dag from recovery file" + ", dagId=" + lastInProgressDAG.toString() + ", dagRecoveryFile=" + dagRecoveryFile + ", len=" + fileStatus.getLen());
        FSDataInputStream dagRecoveryStream = recoveryFS.open(dagRecoveryFile, recoveryBufferSize);
        while (true) {
            HistoryEvent event;
            try {
                event = getNextEvent(dagRecoveryStream);
                if (event == null) {
                    LOG.info("Reached end of dag recovery stream");
                    break;
                }
            } catch (EOFException eof) {
                LOG.info("Reached end of dag recovery stream");
                break;
            } catch (IOException ioe) {
                LOG.warn("Corrupt data found when trying to read next event", ioe);
                break;
            }
            if (skipAllOtherEvents) {
                // hit an error - skip reading other events
                break;
            }
            HistoryEventType eventType = event.getEventType();
            LOG.info("Recovering from event" + ", eventType=" + eventType + ", event=" + event.toString());
            switch(eventType) {
                case DAG_SUBMITTED:
                    DAGSubmittedEvent submittedEvent = (DAGSubmittedEvent) event;
                    recoveredDAGData.recoveredDAG = dagAppMaster.createDAG(submittedEvent.getDAGPlan(), lastInProgressDAG);
                    recoveredDAGData.cumulativeAdditionalResources = submittedEvent.getCumulativeAdditionalLocalResources();
                    recoveredDAGData.recoveredDagID = recoveredDAGData.recoveredDAG.getID();
                    dagAppMaster.setCurrentDAG(recoveredDAGData.recoveredDAG);
                    if (recoveredDAGData.nonRecoverable) {
                        skipAllOtherEvents = true;
                    }
                    break;
                case DAG_INITIALIZED:
                    recoveredDAGData.dagInitedEvent = (DAGInitializedEvent) event;
                    break;
                case DAG_STARTED:
                    recoveredDAGData.dagStartedEvent = (DAGStartedEvent) event;
                    break;
                case DAG_FINISHED:
                    recoveredDAGData.dagFinishedEvent = (DAGFinishedEvent) event;
                    skipAllOtherEvents = true;
                    break;
                case DAG_COMMIT_STARTED:
                case VERTEX_GROUP_COMMIT_STARTED:
                case VERTEX_GROUP_COMMIT_FINISHED:
                case CONTAINER_LAUNCHED:
                    {
                        // Nothing to do for now
                        break;
                    }
                case DAG_KILL_REQUEST:
                    {
                        break;
                    }
                case VERTEX_INITIALIZED:
                    {
                        VertexInitializedEvent vertexInitEvent = (VertexInitializedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexInitEvent.getVertexID());
                        vertexRecoveryData.vertexInitedEvent = vertexInitEvent;
                        break;
                    }
                case VERTEX_CONFIGURE_DONE:
                    {
                        VertexConfigurationDoneEvent reconfigureDoneEvent = (VertexConfigurationDoneEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(reconfigureDoneEvent.getVertexID());
                        vertexRecoveryData.vertexConfigurationDoneEvent = reconfigureDoneEvent;
                        break;
                    }
                case VERTEX_STARTED:
                    {
                        VertexStartedEvent vertexStartedEvent = (VertexStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(vertexStartedEvent.getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "No VertexInitializedEvent before VertexStartedEvent");
                        vertexRecoveryData.vertexStartedEvent = vertexStartedEvent;
                        break;
                    }
                case VERTEX_COMMIT_STARTED:
                    {
                        break;
                    }
                case VERTEX_FINISHED:
                    {
                        VertexFinishedEvent vertexFinishedEvent = (VertexFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.maybeCreateVertexRecoveryData(vertexFinishedEvent.getVertexID());
                        vertexRecoveryData.vertexFinishedEvent = vertexFinishedEvent;
                        break;
                    }
                case TASK_STARTED:
                    {
                        TaskStartedEvent taskStartedEvent = (TaskStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskStartedEvent.getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskStartedEvent, its vertex does not exist:" + taskStartedEvent.getTaskID().getVertexID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskStartedEvent.getTaskID());
                        taskRecoveryData.taskStartedEvent = taskStartedEvent;
                        break;
                    }
                case TASK_FINISHED:
                    {
                        TaskFinishedEvent taskFinishedEvent = (TaskFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taskFinishedEvent.getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskFinishedEvent, its vertex does not exist:" + taskFinishedEvent.getTaskID().getVertexID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.maybeCreateTaskRecoveryData(taskFinishedEvent.getTaskID());
                        taskRecoveryData.taskFinishedEvent = taskFinishedEvent;
                        break;
                    }
                case TASK_ATTEMPT_STARTED:
                    {
                        TaskAttemptStartedEvent taStartedEvent = (TaskAttemptStartedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemptStartedEvent, its vertexId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taStartedEvent.getTaskAttemptID().getTaskID());
                        Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptStartedEvent, its taskId does not exist, taId=" + taStartedEvent.getTaskAttemptID());
                        TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taStartedEvent.getTaskAttemptID());
                        taRecoveryData.taStartedEvent = taStartedEvent;
                        break;
                    }
                case TASK_ATTEMPT_FINISHED:
                    {
                        TaskAttemptFinishedEvent taFinishedEvent = (TaskAttemptFinishedEvent) event;
                        VertexRecoveryData vertexRecoveryData = recoveredDAGData.vertexRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID().getVertexID());
                        Preconditions.checkArgument(vertexRecoveryData != null, "Invalid TaskAttemtFinishedEvent, its vertexId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
                        TaskRecoveryData taskRecoveryData = vertexRecoveryData.taskRecoveryDataMap.get(taFinishedEvent.getTaskAttemptID().getTaskID());
                        Preconditions.checkArgument(taskRecoveryData != null, "Invalid TaskAttemptFinishedEvent, its taskId does not exist, taId=" + taFinishedEvent.getTaskAttemptID());
                        TaskAttemptRecoveryData taRecoveryData = taskRecoveryData.maybeCreateTaskAttemptRecoveryData(taFinishedEvent.getTaskAttemptID());
                        taRecoveryData.taFinishedEvent = taFinishedEvent;
                        break;
                    }
                default:
                    throw new RuntimeException("Invalid data found, unknown event type " + eventType);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("[DAG RECOVERY]" + " dagId=" + lastInProgressDAG + ", eventType=" + eventType + ", event=" + event.toString());
            }
        }
        dagRecoveryStream.close();
    }
    recoveredDAGData.checkRecoverableNonSummary();
    return recoveredDAGData;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HashMap(java.util.HashMap) SummaryEventProto(org.apache.tez.dag.recovery.records.RecoveryProtos.SummaryEventProto) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) TezDAGID(org.apache.tez.dag.records.TezDAGID) EOFException(java.io.EOFException) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) Path(org.apache.hadoop.fs.Path) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) RecoveryProtos(org.apache.tez.dag.recovery.records.RecoveryProtos) IOException(java.io.IOException) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) EOFException(java.io.EOFException) IOException(java.io.IOException) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Aggregations

VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)18 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)13 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)12 VertexInitializedEvent (org.apache.tez.dag.history.events.VertexInitializedEvent)12 VertexConfigurationDoneEvent (org.apache.tez.dag.history.events.VertexConfigurationDoneEvent)11 DAGInitializedEvent (org.apache.tez.dag.history.events.DAGInitializedEvent)9 DAGStartedEvent (org.apache.tez.dag.history.events.DAGStartedEvent)9 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)8 TaskFinishedEvent (org.apache.tez.dag.history.events.TaskFinishedEvent)8 VertexFinishedEvent (org.apache.tez.dag.history.events.VertexFinishedEvent)8 TezTaskID (org.apache.tez.dag.records.TezTaskID)8 TezEvent (org.apache.tez.runtime.api.impl.TezEvent)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 DAGFinishedEvent (org.apache.tez.dag.history.events.DAGFinishedEvent)7 TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)7 Configuration (org.apache.hadoop.conf.Configuration)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)6 AMStartedEvent (org.apache.tez.dag.history.events.AMStartedEvent)6 TezVertexID (org.apache.tez.dag.records.TezVertexID)6