Search in sources :

Example 1 with AMStartedEvent

use of org.apache.tez.dag.history.events.AMStartedEvent in project tez by apache.

the class DAGAppMaster method serviceStart.

@Override
public synchronized void serviceStart() throws Exception {
    // start all the components
    startServices();
    super.serviceStart();
    boolean invalidSession = false;
    if (isSession && !recoveryEnabled && appAttemptID.getAttemptId() > 1) {
        String err = INVALID_SESSION_ERR_MSG;
        LOG.error(err);
        addDiagnostic(err);
        this.state = DAGAppMasterState.ERROR;
        invalidSession = true;
    }
    if (versionMismatch || invalidSession) {
        // Short-circuit and return as no DAG should be run
        this.taskSchedulerManager.setShouldUnregisterFlag();
        shutdownHandler.shutdown();
        return;
    }
    this.appsStartTime = clock.getTime();
    AMStartedEvent startEvent = new AMStartedEvent(appAttemptID, appsStartTime, appMasterUgi.getShortUserName());
    historyEventHandler.handle(new DAGHistoryEvent(startEvent));
    this.lastDAGCompletionTime = clock.getTime();
    DAGRecoveryData recoveredDAGData;
    try {
        recoveredDAGData = recoverDAG();
    } catch (IOException e) {
        LOG.error("Error occurred when trying to recover data from previous attempt." + " Shutting down AM", e);
        this.state = DAGAppMasterState.ERROR;
        this.taskSchedulerManager.setShouldUnregisterFlag();
        shutdownHandler.shutdown();
        return;
    }
    if (!isSession) {
        LOG.info("In Non-Session mode.");
    } else {
        LOG.info("In Session mode. Waiting for DAG over RPC");
        this.state = DAGAppMasterState.IDLE;
    }
    if (recoveredDAGData != null) {
        if (recoveredDAGData.cumulativeAdditionalResources != null) {
            recoveredDAGData.additionalUrlsForClasspath = processAdditionalResources(recoveredDAGData.recoveredDagID, recoveredDAGData.cumulativeAdditionalResources);
            amResources.putAll(recoveredDAGData.cumulativeAdditionalResources);
            cumulativeAdditionalResources.putAll(recoveredDAGData.cumulativeAdditionalResources);
        }
        if (recoveredDAGData.isSessionStopped) {
            LOG.info("AM crashed when shutting down in the previous attempt" + ", continue the shutdown and recover it to SUCCEEDED");
            this.sessionStopped.set(true);
            return;
        }
        if (recoveredDAGData.isCompleted || recoveredDAGData.nonRecoverable) {
            LOG.info("Found previous DAG in completed or non-recoverable state" + ", dagId=" + recoveredDAGData.recoveredDagID + ", isCompleted=" + recoveredDAGData.isCompleted + ", isNonRecoverable=" + recoveredDAGData.nonRecoverable + ", state=" + (recoveredDAGData.dagState == null ? "null" : recoveredDAGData.dagState) + ", failureReason=" + recoveredDAGData.reason);
            _updateLoggers(recoveredDAGData.recoveredDAG, "");
            if (recoveredDAGData.nonRecoverable) {
                addDiagnostic("DAG " + recoveredDAGData.recoveredDagID + " can not be recovered due to " + recoveredDAGData.reason);
                DAGEventRecoverEvent recoverDAGEvent = new DAGEventRecoverEvent(recoveredDAGData.recoveredDAG.getID(), DAGState.FAILED, recoveredDAGData);
                DAGRecoveredEvent dagRecoveredEvent = new DAGRecoveredEvent(this.appAttemptID, recoveredDAGData.recoveredDAG.getID(), recoveredDAGData.recoveredDAG.getName(), recoveredDAGData.recoveredDAG.getUserName(), this.clock.getTime(), DAGState.FAILED, recoveredDAGData.reason, this.containerLogs);
                dagRecoveredEvent.setHistoryLoggingEnabled(recoveredDAGData.recoveredDAG.getConf().getBoolean(TezConfiguration.TEZ_DAG_HISTORY_LOGGING_ENABLED, TezConfiguration.TEZ_DAG_HISTORY_LOGGING_ENABLED_DEFAULT));
                this.historyEventHandler.handle(new DAGHistoryEvent(recoveredDAGData.recoveredDAG.getID(), dagRecoveredEvent));
                dagEventDispatcher.handle(recoverDAGEvent);
                this.state = DAGAppMasterState.RUNNING;
            } else {
                DAGEventRecoverEvent recoverDAGEvent = new DAGEventRecoverEvent(recoveredDAGData.recoveredDAG.getID(), recoveredDAGData.dagState, recoveredDAGData);
                DAGRecoveredEvent dagRecoveredEvent = new DAGRecoveredEvent(this.appAttemptID, recoveredDAGData.recoveredDAG.getID(), recoveredDAGData.recoveredDAG.getName(), recoveredDAGData.recoveredDAG.getUserName(), this.clock.getTime(), recoveredDAGData.dagState, null, this.containerLogs);
                this.historyEventHandler.handle(new DAGHistoryEvent(recoveredDAGData.recoveredDAG.getID(), dagRecoveredEvent));
                dagEventDispatcher.handle(recoverDAGEvent);
                this.state = DAGAppMasterState.RUNNING;
            }
        } else {
            LOG.info("Found DAG to recover, dagId=" + recoveredDAGData.recoveredDAG.getID());
            _updateLoggers(recoveredDAGData.recoveredDAG, "");
            DAGRecoveredEvent dagRecoveredEvent = new DAGRecoveredEvent(this.appAttemptID, recoveredDAGData.recoveredDAG.getID(), recoveredDAGData.recoveredDAG.getName(), recoveredDAGData.recoveredDAG.getUserName(), this.clock.getTime(), this.containerLogs);
            this.historyEventHandler.handle(new DAGHistoryEvent(recoveredDAGData.recoveredDAG.getID(), dagRecoveredEvent));
            DAGEventRecoverEvent recoverDAGEvent = new DAGEventRecoverEvent(recoveredDAGData.recoveredDAG.getID(), recoveredDAGData);
            dagEventDispatcher.handle(recoverDAGEvent);
            this.state = DAGAppMasterState.RUNNING;
        }
    } else {
        if (!isSession) {
            // No dag recovered - in non-session, just restart the original DAG
            dagCounter.set(0);
            startDAG();
        }
    }
    if (isSession && sessionTimeoutInterval >= 0) {
        this.dagSubmissionTimer = new Timer("DAGSubmissionTimer", true);
        this.dagSubmissionTimer.scheduleAtFixedRate(new TimerTask() {

            @Override
            public void run() {
                try {
                    checkAndHandleSessionTimeout();
                } catch (TezException e) {
                    LOG.error("Error when checking AM session timeout", e);
                }
            }
        }, sessionTimeoutInterval, sessionTimeoutInterval / 10);
    }
    // Ignore client heartbeat timeout in local mode or non-session mode
    if (!isLocal && isSession && clientAMHeartbeatTimeoutIntervalMillis > 0) {
        // reset heartbeat time
        clientHandler.updateLastHeartbeatTime();
        this.clientAMHeartBeatTimeoutService = Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setDaemon(true).setNameFormat("ClientAMHeartBeatKeepAliveCheck #%d").build());
        this.clientAMHeartBeatTimeoutService.schedule(new Runnable() {

            @Override
            public void run() {
                try {
                    long nextExpiry = checkAndHandleDAGClientTimeout();
                    if (nextExpiry > 0) {
                        clientAMHeartBeatTimeoutService.schedule(this, nextExpiry, TimeUnit.MILLISECONDS);
                    }
                } catch (TezException e) {
                    // Cannot be thrown unless the AM is being tried to shutdown so no need to
                    // reschedule the timer task
                    LOG.error("Error when checking Client AM heartbeat timeout", e);
                }
            }
        }, clientAMHeartbeatTimeoutIntervalMillis, TimeUnit.MILLISECONDS);
    }
}
Also used : AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) TezException(org.apache.tez.dag.api.TezException) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) IOException(java.io.IOException) DAGEventRecoverEvent(org.apache.tez.dag.app.dag.event.DAGEventRecoverEvent) Timer(java.util.Timer) TimerTask(java.util.TimerTask) DAGRecoveredEvent(org.apache.tez.dag.history.events.DAGRecoveredEvent) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) DAGRecoveryData(org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData)

Example 2 with AMStartedEvent

use of org.apache.tez.dag.history.events.AMStartedEvent in project tez by apache.

the class TestATSHistoryLoggingService method makeHistoryEvents.

private List<DAGHistoryEvent> makeHistoryEvents(TezDAGID dagId, ATSHistoryLoggingService service) {
    List<DAGHistoryEvent> historyEvents = new ArrayList<>();
    long time = System.currentTimeMillis();
    Configuration conf = new Configuration(service.getConfig());
    historyEvents.add(new DAGHistoryEvent(null, new AMStartedEvent(attemptId, time, "user")));
    historyEvents.add(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, time, DAGPlan.getDefaultInstance(), attemptId, null, "user", conf, null, "default")));
    TezVertexID vertexID = TezVertexID.getInstance(dagId, 1);
    historyEvents.add(new DAGHistoryEvent(dagId, new VertexStartedEvent(vertexID, time, time)));
    TezTaskID tezTaskID = TezTaskID.getInstance(vertexID, 1);
    historyEvents.add(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskID, "test", time, time)));
    historyEvents.add(new DAGHistoryEvent(dagId, new TaskAttemptStartedEvent(TezTaskAttemptID.getInstance(tezTaskID, 1), "test", time, ContainerId.newContainerId(attemptId, 1), NodeId.newInstance("localhost", 8765), null, null, null)));
    return historyEvents;
}
Also used : AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) ArrayList(java.util.ArrayList) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Example 3 with AMStartedEvent

use of org.apache.tez.dag.history.events.AMStartedEvent in project tez by apache.

the class TestHistoryEventTimelineConversion method testConvertAMStartedEvent.

@Test(timeout = 5000)
public void testConvertAMStartedEvent() {
    long startTime = random.nextLong();
    AMStartedEvent event = new AMStartedEvent(applicationAttemptId, startTime, user);
    List<TimelineEntity> entities = HistoryEventTimelineConversion.convertToTimelineEntities(event);
    Assert.assertEquals(1, entities.size());
    TimelineEntity timelineEntity = entities.get(0);
    Assert.assertEquals("tez_" + applicationAttemptId.toString(), timelineEntity.getEntityId());
    Assert.assertEquals(EntityTypes.TEZ_APPLICATION_ATTEMPT.name(), timelineEntity.getEntityType());
    final Map<String, Set<String>> relatedEntities = timelineEntity.getRelatedEntities();
    Assert.assertEquals(0, relatedEntities.size());
    final Map<String, Set<Object>> primaryFilters = timelineEntity.getPrimaryFilters();
    Assert.assertEquals(2, primaryFilters.size());
    Assert.assertTrue(primaryFilters.get(ATSConstants.USER).contains(user));
    Assert.assertTrue(primaryFilters.get(ATSConstants.APPLICATION_ID).contains(applicationId.toString()));
    Assert.assertEquals(1, timelineEntity.getEvents().size());
    TimelineEvent evt = timelineEntity.getEvents().get(0);
    Assert.assertEquals(HistoryEventType.AM_STARTED.name(), evt.getEventType());
    Assert.assertEquals(startTime, evt.getTimestamp());
}
Also used : AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) TimelineEvent(org.apache.hadoop.yarn.api.records.timeline.TimelineEvent) Set(java.util.Set) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity) Test(org.junit.Test)

Example 4 with AMStartedEvent

use of org.apache.tez.dag.history.events.AMStartedEvent in project tez by apache.

the class TestHistoryEventTimelineConversion method testHandlerExists.

@Test(timeout = 5000)
public void testHandlerExists() throws JSONException {
    for (HistoryEventType eventType : HistoryEventType.values()) {
        HistoryEvent event = null;
        switch(eventType) {
            case APP_LAUNCHED:
                event = new AppLaunchedEvent(applicationId, random.nextInt(), random.nextInt(), user, new Configuration(false), null);
                break;
            case AM_LAUNCHED:
                event = new AMLaunchedEvent(applicationAttemptId, random.nextInt(), random.nextInt(), user);
                break;
            case AM_STARTED:
                event = new AMStartedEvent(applicationAttemptId, random.nextInt(), user);
                break;
            case DAG_SUBMITTED:
                event = new DAGSubmittedEvent(tezDAGID, random.nextInt(), dagPlan, applicationAttemptId, null, user, null, containerLogs, null);
                break;
            case DAG_INITIALIZED:
                event = new DAGInitializedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName(), null);
                break;
            case DAG_STARTED:
                event = new DAGStartedEvent(tezDAGID, random.nextInt(), user, dagPlan.getName());
                break;
            case DAG_FINISHED:
                event = new DAGFinishedEvent(tezDAGID, random.nextInt(), random.nextInt(), DAGState.ERROR, null, null, user, dagPlan.getName(), null, applicationAttemptId, dagPlan);
                break;
            case VERTEX_INITIALIZED:
                event = new VertexInitializedEvent(tezVertexID, "v1", random.nextInt(), random.nextInt(), random.nextInt(), "proc", null, null, null);
                break;
            case VERTEX_STARTED:
                event = new VertexStartedEvent(tezVertexID, random.nextInt(), random.nextInt());
                break;
            case VERTEX_CONFIGURE_DONE:
                event = new VertexConfigurationDoneEvent(tezVertexID, 0L, 1, null, null, null, true);
                break;
            case VERTEX_FINISHED:
                event = new VertexFinishedEvent(tezVertexID, "v1", 1, random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), random.nextInt(), VertexState.ERROR, null, null, null, null, null);
                break;
            case TASK_STARTED:
                event = new TaskStartedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt());
                break;
            case TASK_FINISHED:
                event = new TaskFinishedEvent(tezTaskID, "v1", random.nextInt(), random.nextInt(), tezTaskAttemptID, TaskState.FAILED, null, null, 0);
                break;
            case TASK_ATTEMPT_STARTED:
                event = new TaskAttemptStartedEvent(tezTaskAttemptID, "v1", random.nextInt(), containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case TASK_ATTEMPT_FINISHED:
                event = new TaskAttemptFinishedEvent(tezTaskAttemptID, "v1", random.nextInt(), random.nextInt(), TaskAttemptState.FAILED, TaskFailureType.NON_FATAL, TaskAttemptTerminationCause.OUTPUT_LOST, null, null, null, null, 0, null, 0, containerId, nodeId, null, null, "nodeHttpAddress");
                break;
            case CONTAINER_LAUNCHED:
                event = new ContainerLaunchedEvent(containerId, random.nextInt(), applicationAttemptId);
                break;
            case CONTAINER_STOPPED:
                event = new ContainerStoppedEvent(containerId, random.nextInt(), -1, applicationAttemptId);
                break;
            case DAG_COMMIT_STARTED:
                event = new DAGCommitStartedEvent();
                break;
            case VERTEX_COMMIT_STARTED:
                event = new VertexCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_STARTED:
                event = new VertexGroupCommitStartedEvent();
                break;
            case VERTEX_GROUP_COMMIT_FINISHED:
                event = new VertexGroupCommitFinishedEvent();
                break;
            case DAG_RECOVERED:
                event = new DAGRecoveredEvent(applicationAttemptId, tezDAGID, dagPlan.getName(), user, random.nextLong(), containerLogs);
                break;
            case DAG_KILL_REQUEST:
                event = new DAGKillRequestEvent();
                break;
            default:
                Assert.fail("Unhandled event type " + eventType);
        }
        if (event == null || !event.isHistoryEvent()) {
            continue;
        }
        HistoryEventTimelineConversion.convertToTimelineEntities(event);
    }
}
Also used : DAGCommitStartedEvent(org.apache.tez.dag.history.events.DAGCommitStartedEvent) Configuration(org.apache.hadoop.conf.Configuration) VertexInitializedEvent(org.apache.tez.dag.history.events.VertexInitializedEvent) HistoryEventType(org.apache.tez.dag.history.HistoryEventType) DAGInitializedEvent(org.apache.tez.dag.history.events.DAGInitializedEvent) ContainerStoppedEvent(org.apache.tez.dag.history.events.ContainerStoppedEvent) DAGKillRequestEvent(org.apache.tez.dag.history.events.DAGKillRequestEvent) DAGStartedEvent(org.apache.tez.dag.history.events.DAGStartedEvent) VertexConfigurationDoneEvent(org.apache.tez.dag.history.events.VertexConfigurationDoneEvent) DAGRecoveredEvent(org.apache.tez.dag.history.events.DAGRecoveredEvent) TaskAttemptFinishedEvent(org.apache.tez.dag.history.events.TaskAttemptFinishedEvent) AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) VertexGroupCommitStartedEvent(org.apache.tez.dag.history.events.VertexGroupCommitStartedEvent) HistoryEvent(org.apache.tez.dag.history.HistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) AppLaunchedEvent(org.apache.tez.dag.history.events.AppLaunchedEvent) TaskFinishedEvent(org.apache.tez.dag.history.events.TaskFinishedEvent) VertexGroupCommitFinishedEvent(org.apache.tez.dag.history.events.VertexGroupCommitFinishedEvent) AMLaunchedEvent(org.apache.tez.dag.history.events.AMLaunchedEvent) ContainerLaunchedEvent(org.apache.tez.dag.history.events.ContainerLaunchedEvent) DAGFinishedEvent(org.apache.tez.dag.history.events.DAGFinishedEvent) VertexFinishedEvent(org.apache.tez.dag.history.events.VertexFinishedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent) VertexCommitStartedEvent(org.apache.tez.dag.history.events.VertexCommitStartedEvent) Test(org.junit.Test)

Example 5 with AMStartedEvent

use of org.apache.tez.dag.history.events.AMStartedEvent in project tez by apache.

the class TestATSV15HistoryLoggingService method makeHistoryEvents.

private List<DAGHistoryEvent> makeHistoryEvents(TezDAGID dagId, ATSV15HistoryLoggingService service) {
    List<DAGHistoryEvent> historyEvents = new ArrayList<>();
    long time = System.currentTimeMillis();
    Configuration conf = new Configuration(service.getConfig());
    historyEvents.add(new DAGHistoryEvent(null, new AMStartedEvent(attemptId, time, user)));
    historyEvents.add(new DAGHistoryEvent(dagId, new DAGSubmittedEvent(dagId, time, DAGPlan.getDefaultInstance(), attemptId, null, user, conf, null, "default")));
    TezVertexID vertexID = TezVertexID.getInstance(dagId, 1);
    historyEvents.add(new DAGHistoryEvent(dagId, new VertexStartedEvent(vertexID, time, time)));
    TezTaskID tezTaskID = TezTaskID.getInstance(vertexID, 1);
    historyEvents.add(new DAGHistoryEvent(dagId, new TaskStartedEvent(tezTaskID, "test", time, time)));
    historyEvents.add(new DAGHistoryEvent(dagId, new TaskAttemptStartedEvent(TezTaskAttemptID.getInstance(tezTaskID, 1), "test", time, ContainerId.newContainerId(attemptId, 1), NodeId.newInstance("localhost", 8765), null, null, null)));
    return historyEvents;
}
Also used : AMStartedEvent(org.apache.tez.dag.history.events.AMStartedEvent) VertexStartedEvent(org.apache.tez.dag.history.events.VertexStartedEvent) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) ArrayList(java.util.ArrayList) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TaskStartedEvent(org.apache.tez.dag.history.events.TaskStartedEvent) TezVertexID(org.apache.tez.dag.records.TezVertexID) TezTaskID(org.apache.tez.dag.records.TezTaskID) TaskAttemptStartedEvent(org.apache.tez.dag.history.events.TaskAttemptStartedEvent) DAGSubmittedEvent(org.apache.tez.dag.history.events.DAGSubmittedEvent)

Aggregations

AMStartedEvent (org.apache.tez.dag.history.events.AMStartedEvent)8 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)6 TaskAttemptStartedEvent (org.apache.tez.dag.history.events.TaskAttemptStartedEvent)6 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)6 VertexStartedEvent (org.apache.tez.dag.history.events.VertexStartedEvent)6 Configuration (org.apache.hadoop.conf.Configuration)5 ContainerLaunchedEvent (org.apache.tez.dag.history.events.ContainerLaunchedEvent)4 ContainerStoppedEvent (org.apache.tez.dag.history.events.ContainerStoppedEvent)4 DAGFinishedEvent (org.apache.tez.dag.history.events.DAGFinishedEvent)4 TaskAttemptFinishedEvent (org.apache.tez.dag.history.events.TaskAttemptFinishedEvent)4 ArrayList (java.util.ArrayList)3 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)3 DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)3 HistoryEvent (org.apache.tez.dag.history.HistoryEvent)3 HistoryEventType (org.apache.tez.dag.history.HistoryEventType)3 AMLaunchedEvent (org.apache.tez.dag.history.events.AMLaunchedEvent)3 DAGCommitStartedEvent (org.apache.tez.dag.history.events.DAGCommitStartedEvent)3 DAGInitializedEvent (org.apache.tez.dag.history.events.DAGInitializedEvent)3 DAGKillRequestEvent (org.apache.tez.dag.history.events.DAGKillRequestEvent)3 DAGRecoveredEvent (org.apache.tez.dag.history.events.DAGRecoveredEvent)3