use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.
the class RMContainerAllocator method getResources.
@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
applyConcurrentTaskLimits();
// will be null the first time
Resource headRoom = Resources.clone(getAvailableResources());
AllocateResponse response;
/*
* If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
* milliseconds before aborting. During this interval, AM will still try
* to contact the RM.
*/
try {
response = makeRemoteRequest();
// Reset retry count if no exception occurred.
retrystartTime = System.currentTimeMillis();
} catch (ApplicationAttemptNotFoundException e) {
// This can happen if the RM has been restarted. If it is in that state,
// this application must clean itself up.
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
} catch (ApplicationMasterNotRegisteredException e) {
LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
// RM may have restarted, re-register with RM.
lastResponseID = 0;
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// re-trying until the retryInterval has expired.
if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
}
// continue to attempt to contact the RM.
throw e;
}
Resource newHeadRoom = getAvailableResources();
List<Container> newContainers = response.getAllocatedContainers();
// Setting NMTokens
if (response.getNMTokens() != null) {
for (NMToken nmToken : response.getNMTokens()) {
NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
}
}
// Setting AMRMToken
if (response.getAMRMToken() != null) {
updateAMRMToken(response.getAMRMToken());
}
List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
// propagate preemption requests
final PreemptionMessage preemptReq = response.getPreemptionMessage();
if (preemptReq != null) {
preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
}
if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
//something changed
recalculateReduceSchedule = true;
if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
LOG.debug("headroom=" + newHeadRoom);
}
}
if (LOG.isDebugEnabled()) {
for (Container cont : newContainers) {
LOG.debug("Received new Container :" + cont);
}
}
//Called on each allocation. Will know about newly blacklisted/added hosts.
computeIgnoreBlacklisting();
handleUpdatedNodes(response);
handleJobPriorityChange(response);
// handle receiving the timeline collector address for this app
String collectorAddr = response.getCollectorAddr();
MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
}
for (ContainerStatus cont : finishedContainers) {
processFinishedContainer(cont);
}
return newContainers;
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.
the class RMContainerAllocator method handleReduceContainerRequest.
@SuppressWarnings({ "unchecked" })
private void handleReduceContainerRequest(ContainerRequestEvent reqEvent) {
assert (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.REDUCE));
Resource supportedMaxContainerCapability = getMaxContainerCapability();
JobId jobId = getJob().getID();
if (reduceResourceRequest.equals(Resources.none())) {
reduceResourceRequest = reqEvent.getCapability();
eventHandler.handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.REDUCE, reduceResourceRequest.getMemorySize())));
LOG.info("reduceResourceRequest:" + reduceResourceRequest);
}
boolean reduceContainerRequestAccepted = true;
if (reduceResourceRequest.getMemorySize() > supportedMaxContainerCapability.getMemorySize() || reduceResourceRequest.getVirtualCores() > supportedMaxContainerCapability.getVirtualCores()) {
reduceContainerRequestAccepted = false;
}
if (reduceContainerRequestAccepted) {
// set the resources
reqEvent.getCapability().setVirtualCores(reduceResourceRequest.getVirtualCores());
reqEvent.getCapability().setMemorySize(reduceResourceRequest.getMemorySize());
if (reqEvent.getEarlierAttemptFailed()) {
//previously failed reducers are added to the front for fail fast
pendingReduces.addFirst(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
} else {
//reduces are added to pending queue and are slowly ramped up
pendingReduces.add(new ContainerRequest(reqEvent, PRIORITY_REDUCE, reduceNodeLabelExpression));
}
} else {
String diagMsg = "REDUCE capability required is more than the " + "supported max container capability in the cluster. Killing" + " the Job. reduceResourceRequest: " + reduceResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability;
LOG.info(diagMsg);
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.
the class JobImpl method handle.
@Override
public /**
* The only entry point to change the Job.
*/
void handle(JobEvent event) {
if (LOG.isDebugEnabled()) {
LOG.debug("Processing " + event.getJobId() + " of type " + event.getType());
}
try {
writeLock.lock();
JobStateInternal oldState = getInternalState();
try {
getStateMachine().doTransition(event.getType(), event);
} catch (InvalidStateTransitionException e) {
LOG.error("Can't handle this event at current state", e);
addDiagnostic("Invalid event " + event.getType() + " on Job " + this.jobId);
eventHandler.handle(new JobEvent(this.jobId, JobEventType.INTERNAL_ERROR));
}
//notify the eventhandler of state change
if (oldState != getInternalState()) {
LOG.info(jobId + "Job Transitioned from " + oldState + " to " + getInternalState());
rememberLastNonFinalState(oldState);
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.
the class MRAppMaster method startJobs.
/**
* This can be overridden to instantiate multiple jobs and create a
* workflow.
*
* TODO: Rework the design to actually support this. Currently much of the
* job stuff has been moved to init() above to support uberization (MR-1220).
* In a typical workflow, one presumably would want to uberize only a subset
* of the jobs (the "small" ones), which is awkward with the current design.
*/
@SuppressWarnings("unchecked")
protected void startJobs() {
/** create a job-start event to get this ball rolling */
JobEvent startJobEvent = new JobStartEvent(job.getID(), recoveredJobStartTime);
/** send the job-start event. this triggers the job execution. */
dispatcher.getEventHandler().handle(startJobEvent);
}
use of org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent in project hadoop by apache.
the class RMContainerAllocator method serviceStart.
@Override
protected void serviceStart() throws Exception {
this.eventHandlingThread = new Thread() {
@SuppressWarnings("unchecked")
@Override
public void run() {
ContainerAllocatorEvent event;
while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
try {
event = RMContainerAllocator.this.eventQueue.take();
} catch (InterruptedException e) {
if (!stopped.get()) {
LOG.error("Returning, interrupted : " + e);
}
return;
}
try {
handleEvent(event);
} catch (Throwable t) {
LOG.error("Error in handling event type " + event.getType() + " to the ContainreAllocator", t);
// Kill the AM
eventHandler.handle(new JobEvent(getJob().getID(), JobEventType.INTERNAL_ERROR));
return;
}
}
}
};
this.eventHandlingThread.start();
super.serviceStart();
}
Aggregations