Search in sources :

Example 36 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestATSV15HistoryLoggingService method testDAGGroupingDisabled.

@Test(timeout = 2000)
public void testDAGGroupingDisabled() throws Exception {
    ATSV15HistoryLoggingService service = createService(1);
    service.start();
    TezDAGID dagId1 = TezDAGID.getInstance(appId, 0);
    for (DAGHistoryEvent event : makeHistoryEvents(dagId1, service)) {
        service.handle(event);
    }
    while (!service.eventQueue.isEmpty()) {
        Thread.sleep(100);
    }
    assertEquals(2, entityLog.size());
    List<TimelineEntity> amEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, appId.toString()));
    assertNotNull(amEvents);
    assertEquals(1, amEvents.size());
    List<TimelineEntity> nonGroupedDagEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, dagId1.toString()));
    assertNotNull(nonGroupedDagEvents);
    assertEquals(5, nonGroupedDagEvents.size());
    service.stop();
}
Also used : TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity) Test(org.junit.Test)

Example 37 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class TestATSV15HistoryLoggingService method testDAGGroupingGroupingEnabled.

@Test(timeout = 2000)
public void testDAGGroupingGroupingEnabled() throws Exception {
    int numDagsPerGroup = 100;
    ATSV15HistoryLoggingService service = createService(numDagsPerGroup);
    service.start();
    TezDAGID dagId1 = TezDAGID.getInstance(appId, 1);
    for (DAGHistoryEvent event : makeHistoryEvents(dagId1, service)) {
        service.handle(event);
    }
    TezDAGID dagId2 = TezDAGID.getInstance(appId, numDagsPerGroup);
    for (DAGHistoryEvent event : makeHistoryEvents(dagId2, service)) {
        service.handle(event);
    }
    TezDAGID dagId3 = TezDAGID.getInstance(appId, numDagsPerGroup + 1);
    for (DAGHistoryEvent event : makeHistoryEvents(dagId3, service)) {
        service.handle(event);
    }
    while (!service.eventQueue.isEmpty()) {
        Thread.sleep(100);
    }
    assertEquals(dagId1.getGroupId(numDagsPerGroup), dagId2.getGroupId(numDagsPerGroup));
    assertNotEquals(dagId2.getGroupId(numDagsPerGroup), dagId3.getGroupId(numDagsPerGroup));
    assertEquals(3, entityLog.size());
    List<TimelineEntity> amEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, appId.toString()));
    assertNotNull(amEvents);
    assertEquals(3, amEvents.size());
    List<TimelineEntity> nonGroupedDagEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, dagId1.toString()));
    assertNull(nonGroupedDagEvents);
    List<TimelineEntity> groupedDagEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, dagId1.getGroupId(numDagsPerGroup)));
    assertNotNull(groupedDagEvents);
    assertEquals(10, groupedDagEvents.size());
    nonGroupedDagEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, dagId3.toString()));
    assertNull(nonGroupedDagEvents);
    groupedDagEvents = entityLog.get(TimelineEntityGroupId.newInstance(appId, dagId3.getGroupId(numDagsPerGroup)));
    assertNotNull(groupedDagEvents);
    assertEquals(5, groupedDagEvents.size());
    service.stop();
}
Also used : TezDAGID(org.apache.tez.dag.records.TezDAGID) DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) TimelineEntity(org.apache.hadoop.yarn.api.records.timeline.TimelineEntity) Test(org.junit.Test)

Example 38 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class SimpleHistoryLoggingService method serviceStart.

@Override
protected void serviceStart() throws Exception {
    LOG.info("Starting SimpleHistoryLoggingService");
    outputStream = logFileFS.create(logFileLocation, true);
    eventHandlingThread = new Thread(new Runnable() {

        @Override
        public void run() {
            DAGHistoryEvent event;
            while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
                try {
                    event = eventQueue.take();
                } catch (InterruptedException e) {
                    LOG.info("EventQueue take interrupted. Returning");
                    return;
                }
                handleEvent(event);
            }
        }
    }, "HistoryEventHandlingThread");
    eventHandlingThread.start();
    super.serviceStart();
}
Also used : DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent)

Example 39 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class SimpleHistoryLoggingService method serviceStop.

@Override
protected void serviceStop() throws Exception {
    LOG.info("Stopping SimpleHistoryLoggingService" + ", eventQueueBacklog=" + eventQueue.size());
    stopped.set(true);
    if (eventHandlingThread != null) {
        eventHandlingThread.interrupt();
    }
    while (!eventQueue.isEmpty()) {
        DAGHistoryEvent event = eventQueue.poll();
        if (event == null) {
            break;
        }
        handleEvent(event);
    }
    try {
        if (outputStream != null) {
            outputStream.hflush();
            outputStream.close();
        }
    } catch (IOException ioe) {
        LOG.warn("Failed to close output stream", ioe);
    }
    super.serviceStop();
}
Also used : DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) IOException(java.io.IOException)

Example 40 with DAGHistoryEvent

use of org.apache.tez.dag.history.DAGHistoryEvent in project tez by apache.

the class RecoveryService method serviceStart.

@Override
public void serviceStart() {
    lastFlushTime = appContext.getClock().getTime();
    eventHandlingThread = new Thread(new Runnable() {

        @Override
        public void run() {
            TezUtilsInternal.setHadoopCallerContext(appContext.getHadoopShim(), appContext.getApplicationID());
            DAGHistoryEvent event;
            while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
                drained = eventQueue.isEmpty();
                // and calling notify every time in the normal run of the loop.
                if (getServiceState() == STATE.STOPPED) {
                    synchronized (waitForDrained) {
                        if (drained) {
                            waitForDrained.notify();
                        }
                    }
                }
                if (recoveryFatalErrorOccurred.get()) {
                    LOG.error("Recovery failure occurred. Stopping recovery thread." + " Current eventQueueSize=" + eventQueue.size());
                    eventQueue.clear();
                    return;
                }
                // Log the size of the event-queue every so often.
                if (eventCounter != 0 && eventCounter % 1000 == 0) {
                    LOG.info("Event queue stats" + ", eventsProcessedSinceLastUpdate=" + eventsProcessed + ", eventQueueSize=" + eventQueue.size());
                    eventCounter = 0;
                    eventsProcessed = 0;
                } else {
                    ++eventCounter;
                }
                try {
                    event = eventQueue.take();
                } catch (InterruptedException e) {
                    LOG.info("EventQueue take interrupted. Returning");
                    return;
                }
                synchronized (lock) {
                    try {
                        ++eventsProcessed;
                        handleRecoveryEvent(event);
                    } catch (Exception e) {
                        // For now, ignore any such errors as these are non-critical
                        // All summary event related errors are handled as critical
                        LOG.warn("Error handling recovery event", e);
                    }
                }
            }
        }
    }, "RecoveryEventHandlingThread");
    eventHandlingThread.start();
    started.set(true);
}
Also used : DAGHistoryEvent(org.apache.tez.dag.history.DAGHistoryEvent) IOException(java.io.IOException)

Aggregations

DAGHistoryEvent (org.apache.tez.dag.history.DAGHistoryEvent)81 TezDAGID (org.apache.tez.dag.records.TezDAGID)38 Test (org.junit.Test)33 DAGSubmittedEvent (org.apache.tez.dag.history.events.DAGSubmittedEvent)21 IOException (java.io.IOException)18 Configuration (org.apache.hadoop.conf.Configuration)18 DAGPlan (org.apache.tez.dag.api.records.DAGProtos.DAGPlan)18 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)17 Path (org.apache.hadoop.fs.Path)15 SystemClock (org.apache.hadoop.yarn.util.SystemClock)14 DAGRecoveryData (org.apache.tez.dag.app.RecoveryParser.DAGRecoveryData)13 DAGStartedEvent (org.apache.tez.dag.history.events.DAGStartedEvent)11 RecoveryService (org.apache.tez.dag.history.recovery.RecoveryService)11 TezVertexID (org.apache.tez.dag.records.TezVertexID)10 TaskStartedEvent (org.apache.tez.dag.history.events.TaskStartedEvent)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)6 TimelineEntity (org.apache.hadoop.yarn.api.records.timeline.TimelineEntity)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 DAGFinishedEvent (org.apache.tez.dag.history.events.DAGFinishedEvent)6 VertexFinishedEvent (org.apache.tez.dag.history.events.VertexFinishedEvent)6