use of org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent in project hadoop by apache.
the class TaskImpl method internalError.
protected void internalError(TaskEventType type) {
LOG.error("Invalid event " + type + " on Task " + this.taskId);
eventHandler.handle(new JobDiagnosticsUpdateEvent(this.taskId.getJobId(), "Invalid event " + type + " on Task " + this.taskId));
eventHandler.handle(new JobEvent(this.taskId.getJobId(), JobEventType.INTERNAL_ERROR));
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent in project hadoop by apache.
the class RMContainerAllocator method handleMapContainerRequest.
@SuppressWarnings({ "unchecked" })
private void handleMapContainerRequest(ContainerRequestEvent reqEvent) {
assert (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.MAP));
Resource supportedMaxContainerCapability = getMaxContainerCapability();
JobId jobId = getJob().getID();
if (mapResourceRequest.equals(Resources.none())) {
mapResourceRequest = reqEvent.getCapability();
eventHandler.handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.MAP, mapResourceRequest.getMemorySize())));
LOG.info("mapResourceRequest:" + mapResourceRequest);
}
boolean mapContainerRequestAccepted = true;
if (mapResourceRequest.getMemorySize() > supportedMaxContainerCapability.getMemorySize() || mapResourceRequest.getVirtualCores() > supportedMaxContainerCapability.getVirtualCores()) {
mapContainerRequestAccepted = false;
}
if (mapContainerRequestAccepted) {
// set the resources
reqEvent.getCapability().setMemorySize(mapResourceRequest.getMemorySize());
reqEvent.getCapability().setVirtualCores(mapResourceRequest.getVirtualCores());
//maps are immediately scheduled
scheduledRequests.addMap(reqEvent);
} else {
String diagMsg = "The required MAP capability is more than the " + "supported max container capability in the cluster. Killing" + " the Job. mapResourceRequest: " + mapResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability;
LOG.info(diagMsg);
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent in project hadoop by apache.
the class RMContainerAllocator method handleReduceContainerRequest.
@SuppressWarnings({ "unchecked" })
private void handleReduceContainerRequest(ContainerRequestEvent reqEvent) {
assert (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.REDUCE));
Resource supportedMaxContainerCapability = getMaxContainerCapability();
JobId jobId = getJob().getID();
if (reduceResourceRequest.equals(Resources.none())) {
reduceResourceRequest = reqEvent.getCapability();
eventHandler.handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.REDUCE, reduceResourceRequest.getMemorySize())));
LOG.info("reduceResourceRequest:" + reduceResourceRequest);
}
boolean reduceContainerRequestAccepted = true;
if (reduceResourceRequest.getMemorySize() > supportedMaxContainerCapability.getMemorySize() || reduceResourceRequest.getVirtualCores() > supportedMaxContainerCapability.getVirtualCores()) {
reduceContainerRequestAccepted = false;
}
if (reduceContainerRequestAccepted) {
// set the resources
reqEvent.getCapability().setVirtualCores(reduceResourceRequest.getVirtualCores());
reqEvent.getCapability().setMemorySize(reduceResourceRequest.getMemorySize());
if (reqEvent.getEarlierAttemptFailed()) {
//previously failed reducers are added to the front for fail fast
pendingReduces.addFirst(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
} else {
//reduces are added to pending queue and are slowly ramped up
pendingReduces.add(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
}
} else {
String diagMsg = "REDUCE capability required is more than the " + "supported max container capability in the cluster. Killing" + " the Job. reduceResourceRequest: " + reduceResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability;
LOG.info(diagMsg);
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent in project hadoop by apache.
the class TestJobImpl method testReportDiagnostics.
@Test
public void testReportDiagnostics() throws Exception {
JobID jobID = JobID.forName("job_1234567890000_0001");
JobId jobId = TypeConverter.toYarn(jobID);
final String diagMsg = "some diagnostic message";
final JobDiagnosticsUpdateEvent diagUpdateEvent = new JobDiagnosticsUpdateEvent(jobId, diagMsg);
MRAppMetrics mrAppMetrics = MRAppMetrics.create();
AppContext mockContext = mock(AppContext.class);
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true);
JobImpl job = new JobImpl(jobId, Records.newRecord(ApplicationAttemptId.class), new Configuration(), mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, SystemClock.getInstance(), null, mrAppMetrics, null, true, null, 0, null, mockContext, null, null);
job.handle(diagUpdateEvent);
String diagnostics = job.getReport().getDiagnostics();
Assert.assertNotNull(diagnostics);
Assert.assertTrue(diagnostics.contains(diagMsg));
job = new JobImpl(jobId, Records.newRecord(ApplicationAttemptId.class), new Configuration(), mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, SystemClock.getInstance(), null, mrAppMetrics, null, true, null, 0, null, mockContext, null, null);
job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
job.handle(diagUpdateEvent);
diagnostics = job.getReport().getDiagnostics();
Assert.assertNotNull(diagnostics);
Assert.assertTrue(diagnostics.contains(diagMsg));
}
Aggregations