use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class RecoveryService method handleRecoveryEvent.
@VisibleForTesting
protected void handleRecoveryEvent(DAGHistoryEvent event) throws IOException {
HistoryEventType eventType = event.getHistoryEvent().getEventType();
if (LOG.isDebugEnabled()) {
LOG.debug("Handling recovery event of type " + event.getHistoryEvent().getEventType());
}
TezDAGID dagID = event.getDagID();
if (completedDAGs.contains(dagID)) {
// no need to recover completed DAGs
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping Recovery Event as DAG completed" + ", dagId=" + dagID + ", completed=" + completedDAGs.contains(dagID) + ", skipped=" + skippedDAGs.contains(dagID) + ", eventType=" + eventType);
}
return;
}
if (!outputStreamMap.containsKey(dagID)) {
Path dagFilePath = TezCommonUtils.getDAGRecoveryPath(recoveryPath, dagID.toString());
FSDataOutputStream outputStream;
if (recoveryDirFS.exists(dagFilePath)) {
createFatalErrorFlagDir();
return;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Opening DAG recovery file in create mode" + ", filePath=" + dagFilePath);
}
outputStream = recoveryDirFS.create(dagFilePath, false, bufferSize);
}
outputStreamMap.put(dagID, outputStream);
}
FSDataOutputStream outputStream = outputStreamMap.get(dagID);
if (LOG.isDebugEnabled()) {
LOG.debug("Writing recovery event to output stream" + ", dagId=" + dagID + ", eventType=" + eventType);
}
++unflushedEventsCount;
outputStream.writeInt(event.getHistoryEvent().getEventType().ordinal());
event.getHistoryEvent().toProtoStream(outputStream);
if (!EnumSet.of(HistoryEventType.DAG_SUBMITTED, HistoryEventType.DAG_FINISHED).contains(eventType)) {
maybeFlush(outputStream);
}
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class RecoveryService method handle.
public void handle(DAGHistoryEvent event) throws IOException {
if (stopped.get()) {
LOG.warn("Igoring event as service stopped, eventType" + event.getHistoryEvent().getEventType());
return;
}
HistoryEventType eventType = event.getHistoryEvent().getEventType();
if (recoveryFatalErrorOccurred.get()) {
return;
}
if (!started.get()) {
LOG.warn("Adding event of type " + eventType + " to queue as service not started");
addToEventQueue(event);
return;
}
TezDAGID dagId = event.getDagID();
if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) {
DAGSubmittedEvent dagSubmittedEvent = (DAGSubmittedEvent) event.getHistoryEvent();
String dagName = dagSubmittedEvent.getDAGName();
if (dagName != null && dagName.startsWith(TezConstants.TEZ_PREWARM_DAG_NAME_PREFIX)) {
// Skip recording pre-warm DAG events
skippedDAGs.add(dagId);
return;
}
}
if (dagId == null || skippedDAGs.contains(dagId)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping event for DAG" + ", eventType=" + eventType + ", dagId=" + (dagId == null ? "null" : dagId.toString()) + ", isSkippedDAG=" + (dagId == null ? "null" : skippedDAGs.contains(dagId)));
}
return;
}
if (event.getHistoryEvent() instanceof SummaryEvent) {
synchronized (lock) {
if (stopped.get()) {
LOG.warn("Igoring event as service stopped, eventType" + event.getHistoryEvent().getEventType());
return;
}
try {
SummaryEvent summaryEvent = (SummaryEvent) event.getHistoryEvent();
handleSummaryEvent(dagId, eventType, summaryEvent);
if (summaryEvent.writeToRecoveryImmediately()) {
handleRecoveryEvent(event);
// outputStream may already be closed and removed
if (outputStreamMap.containsKey(event.getDagID())) {
doFlush(outputStreamMap.get(event.getDagID()), appContext.getClock().getTime());
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Queueing Non-immediate Summary/Recovery event of type" + eventType.name());
}
addToEventQueue(event);
}
if (eventType.equals(HistoryEventType.DAG_FINISHED)) {
LOG.info("DAG completed" + ", dagId=" + event.getDagID() + ", queueSize=" + eventQueue.size());
completedDAGs.add(dagId);
if (outputStreamMap.containsKey(dagId)) {
try {
outputStreamMap.get(dagId).close();
outputStreamMap.remove(dagId);
} catch (IOException ioe) {
LOG.warn("Error when trying to flush/close recovery file for" + " dag, dagId=" + event.getDagID());
}
}
}
} catch (IOException ioe) {
LOG.error("Error handling summary event" + ", eventType=" + event.getHistoryEvent().getEventType(), ioe);
createFatalErrorFlagDir();
if (eventType.equals(HistoryEventType.DAG_SUBMITTED)) {
// Throw error to tell client that dag submission failed
throw ioe;
}
}
}
} else {
// All other events just get queued
if (LOG.isDebugEnabled()) {
LOG.debug("Queueing Non-Summary Recovery event of type " + eventType.name());
}
addToEventQueue(event);
}
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class HistoryEventHandler method shouldLogTaskAttemptEvents.
// If the log level is set to TASK_ATTEMPT and filters are configured, then we should suppress
// the start event and publish it only when TaskAttemptFinishedEvent is received after
// matching against the filter.
// Note: if the AM is killed before we get the TaskAttemptFinishedEvent, we'll lose this event.
private boolean shouldLogTaskAttemptEvents(DAGHistoryEvent event, HistoryLogLevel dagLogLevel) {
HistoryEvent historyEvent = event.getHistoryEvent();
HistoryEventType eventType = historyEvent.getEventType();
if (dagLogLevel == HistoryLogLevel.TASK_ATTEMPT && (eventType == HistoryEventType.TASK_ATTEMPT_STARTED || eventType == HistoryEventType.TASK_ATTEMPT_FINISHED)) {
TezDAGID dagId = event.getDagID();
Set<TaskAttemptTerminationCause> filters = null;
if (dagId != null) {
filters = dagIdToTaskAttemptFilters.get(dagId);
}
if (filters == null) {
filters = amTaskAttemptFilters;
}
if (filters == null) {
return true;
}
if (eventType == HistoryEventType.TASK_ATTEMPT_STARTED) {
suppressedEvents.put(((TaskAttemptStartedEvent) historyEvent).getTaskAttemptID(), event);
return false;
} else {
// TaskAttemptFinishedEvent
TaskAttemptFinishedEvent finishedEvent = (TaskAttemptFinishedEvent) historyEvent;
if (filters.contains(finishedEvent.getTaskAttemptError())) {
suppressedEvents.remove(finishedEvent.getTaskAttemptID());
return false;
}
}
}
return true;
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestATSV15HistoryLoggingService method testSessionDomainsDagFailed.
@Test
public void testSessionDomainsDagFailed() throws Exception {
ATSV15HistoryLoggingService service = createService(-1);
when(appContext.isSession()).thenReturn(true);
HistoryACLPolicyManager historyACLPolicyManager = mock(HistoryACLPolicyManager.class);
service.historyACLPolicyManager = historyACLPolicyManager;
when(historyACLPolicyManager.setupSessionACLs((Configuration) any(), eq(appId))).thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-id"));
service.start();
// Verify that the session domain creation was called.
verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration) any(), eq(appId));
// Mock dag domain creation.
when(historyACLPolicyManager.setupSessionDAGACLs((Configuration) any(), eq(appId), eq("0"), (DAGAccessControls) any())).thenThrow(new IOException());
// Send the event and wait for completion.
TezDAGID dagId1 = TezDAGID.getInstance(appId, 0);
for (DAGHistoryEvent event : makeHistoryEvents(dagId1, service)) {
service.handle(event);
}
while (!service.eventQueue.isEmpty()) {
Thread.sleep(100);
}
// Verify dag domain creation was called.
verify(historyACLPolicyManager, times(1)).setupSessionDAGACLs((Configuration) any(), eq(appId), eq("0"), (DAGAccessControls) any());
// AM events sent, dag events are not sent.
verify(historyACLPolicyManager, times(1)).updateTimelineEntityDomain(any(), eq("session-id"));
verify(historyACLPolicyManager, times(0)).updateTimelineEntityDomain(any(), eq("dag-id"));
assertEquals(1, entityLog.size());
service.stop();
}
use of org.apache.tez.dag.records.TezDAGID in project tez by apache.
the class TestATSV15HistoryLoggingService method testSessionDomains.
@Test
public void testSessionDomains() throws Exception {
ATSV15HistoryLoggingService service = createService(-1);
when(appContext.isSession()).thenReturn(true);
HistoryACLPolicyManager historyACLPolicyManager = mock(HistoryACLPolicyManager.class);
service.historyACLPolicyManager = historyACLPolicyManager;
when(historyACLPolicyManager.setupSessionACLs((Configuration) any(), eq(appId))).thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_SESSION_DOMAIN_ID, "session-id"));
service.start();
// Verify that the session domain was created.
verify(historyACLPolicyManager, times(1)).setupSessionACLs((Configuration) any(), eq(appId));
// Mock dag domain creation.
when(historyACLPolicyManager.setupSessionDAGACLs((Configuration) any(), eq(appId), eq("0"), (DAGAccessControls) any())).thenReturn(Collections.singletonMap(TezConfiguration.YARN_ATS_ACL_DAG_DOMAIN_ID, "dag-id"));
// Send the event and wait for completion.
TezDAGID dagId1 = TezDAGID.getInstance(appId, 0);
for (DAGHistoryEvent event : makeHistoryEvents(dagId1, service)) {
service.handle(event);
}
while (!service.eventQueue.isEmpty()) {
Thread.sleep(100);
}
// Verify dag domain was created.
verify(historyACLPolicyManager, times(1)).setupSessionDAGACLs((Configuration) any(), eq(appId), eq("0"), (DAGAccessControls) any());
// calls were made with correct domain ids.
verify(historyACLPolicyManager, times(1)).updateTimelineEntityDomain(any(), eq("session-id"));
verify(historyACLPolicyManager, times(5)).updateTimelineEntityDomain(any(), eq("dag-id"));
service.stop();
}
Aggregations