use of org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent in project hadoop by apache.
the class MRAppMaster method serviceStart.
@SuppressWarnings("unchecked")
@Override
protected void serviceStart() throws Exception {
amInfos = new LinkedList<AMInfo>();
completedTasksFromPreviousRun = new HashMap<TaskId, TaskInfo>();
processRecovery();
// Current an AMInfo for the current AM generation.
AMInfo amInfo = MRBuilderUtils.newAMInfo(appAttemptID, startTime, containerID, nmHost, nmPort, nmHttpPort);
// /////////////////// Create the job itself.
job = createJob(getConfig(), forcedState, shutDownMessage);
// Send out an MR AM inited event for all previous AMs.
for (AMInfo info : amInfos) {
dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(info.getAppAttemptId(), info.getStartTime(), info.getContainerId(), info.getNodeManagerHost(), info.getNodeManagerPort(), info.getNodeManagerHttpPort(), appSubmitTime)));
}
// Send out an MR AM inited event for this AM.
dispatcher.getEventHandler().handle(new JobHistoryEvent(job.getID(), new AMStartedEvent(amInfo.getAppAttemptId(), amInfo.getStartTime(), amInfo.getContainerId(), amInfo.getNodeManagerHost(), amInfo.getNodeManagerPort(), amInfo.getNodeManagerHttpPort(), this.forcedState == null ? null : this.forcedState.toString(), appSubmitTime)));
amInfos.add(amInfo);
// metrics system init is really init & start.
// It's more test friendly to put it here.
DefaultMetricsSystem.initialize("MRAppMaster");
boolean initFailed = false;
if (!errorHappenedShutDown) {
// create a job event for job intialization
JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
// Send init to the job (this does NOT trigger job execution)
// This is a synchronous call, not an event through dispatcher. We want
// job-init to be done completely here.
jobEventDispatcher.handle(initJobEvent);
// If job is still not initialized, an error happened during
// initialization. Must complete starting all of the services so failure
// events can be processed.
initFailed = (((JobImpl) job).getInternalState() != JobStateInternal.INITED);
if (job.isUber()) {
speculatorEventDispatcher.disableSpeculation();
LOG.info("MRAppMaster uberizing job " + job.getID() + " in local container (\"uber-AM\") on node " + nmHost + ":" + nmPort + ".");
} else {
// send init to speculator only for non-uber jobs.
// This won't yet start as dispatcher isn't started yet.
dispatcher.getEventHandler().handle(new SpeculatorEvent(job.getID(), clock.getTime()));
LOG.info("MRAppMaster launching normal, non-uberized, multi-container " + "job " + job.getID() + ".");
}
// Start ClientService here, since it's not initialized if
// errorHappenedShutDown is true
clientService.start();
}
//start all the components
super.serviceStart();
// finally set the job classloader
MRApps.setClassLoader(jobClassLoader, getConfig());
// set job classloader if configured
Limits.init(getConfig());
if (initFailed) {
JobEvent initFailedEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT_FAILED);
jobEventDispatcher.handle(initFailedEvent);
} else {
// All components have started, start the job.
startJobs();
}
}
use of org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent in project hadoop by apache.
the class TestRuntimeEstimators method coreTestEstimator.
private void coreTestEstimator(TaskRuntimeEstimator testedEstimator, int expectedSpeculations) {
estimator = testedEstimator;
clock = new ControlledClock();
dispatcher = new AsyncDispatcher();
myJob = null;
slotsInUse.set(0);
completedMaps.set(0);
completedReduces.set(0);
successfulSpeculations.set(0);
taskTimeSavedBySpeculation.set(0);
clock.tickMsec(1000);
Configuration conf = new Configuration();
myAppContext = new MyAppContext(MAP_TASKS, REDUCE_TASKS);
myJob = myAppContext.getAllJobs().values().iterator().next();
estimator.contextualize(conf, myAppContext);
conf.setLong(MRJobConfig.SPECULATIVE_RETRY_AFTER_NO_SPECULATE, 500L);
conf.setLong(MRJobConfig.SPECULATIVE_RETRY_AFTER_SPECULATE, 5000L);
conf.setDouble(MRJobConfig.SPECULATIVECAP_RUNNING_TASKS, 0.1);
conf.setDouble(MRJobConfig.SPECULATIVECAP_TOTAL_TASKS, 0.001);
conf.setInt(MRJobConfig.SPECULATIVE_MINIMUM_ALLOWED_TASKS, 5);
speculator = new DefaultSpeculator(conf, myAppContext, estimator, clock);
Assert.assertEquals("wrong SPECULATIVE_RETRY_AFTER_NO_SPECULATE value", 500L, speculator.getSoonestRetryAfterNoSpeculate());
Assert.assertEquals("wrong SPECULATIVE_RETRY_AFTER_SPECULATE value", 5000L, speculator.getSoonestRetryAfterSpeculate());
Assert.assertEquals(speculator.getProportionRunningTasksSpeculatable(), 0.1, 0.00001);
Assert.assertEquals(speculator.getProportionTotalTasksSpeculatable(), 0.001, 0.00001);
Assert.assertEquals("wrong SPECULATIVE_MINIMUM_ALLOWED_TASKS value", 5, speculator.getMinimumAllowedSpeculativeTasks());
dispatcher.register(Speculator.EventType.class, speculator);
dispatcher.register(TaskEventType.class, new SpeculationRequestEventHandler());
dispatcher.init(conf);
dispatcher.start();
speculator.init(conf);
speculator.start();
// Now that the plumbing is hooked up, we do the following:
// do until all tasks are finished, ...
// 1: If we have spare capacity, assign as many map tasks as we can, then
// assign as many reduce tasks as we can. Note that an odd reduce
// task might be started while there are still map tasks, because
// map tasks take 3 slots and reduce tasks 2 slots.
// 2: Send a speculation event for every task attempt that's running
// note that new attempts might get started by the speculator
// discover undone tasks
int undoneMaps = MAP_TASKS;
int undoneReduces = REDUCE_TASKS;
// build a task sequence where all the maps precede any of the reduces
List<Task> allTasksSequence = new LinkedList<Task>();
allTasksSequence.addAll(myJob.getTasks(TaskType.MAP).values());
allTasksSequence.addAll(myJob.getTasks(TaskType.REDUCE).values());
while (undoneMaps + undoneReduces > 0) {
undoneMaps = 0;
undoneReduces = 0;
// start all attempts which are new but for which there is enough slots
for (Task task : allTasksSequence) {
if (!task.isFinished()) {
if (task.getType() == TaskType.MAP) {
++undoneMaps;
} else {
++undoneReduces;
}
}
for (TaskAttempt attempt : task.getAttempts().values()) {
if (attempt.getState() == TaskAttemptState.NEW && INITIAL_NUMBER_FREE_SLOTS - slotsInUse.get() >= taskTypeSlots(task.getType())) {
MyTaskAttemptImpl attemptImpl = (MyTaskAttemptImpl) attempt;
SpeculatorEvent event = new SpeculatorEvent(attempt.getID(), false, clock.getTime());
speculator.handle(event);
attemptImpl.startUp();
} else {
// If a task attempt is in progress we should send the news to
// the Speculator.
TaskAttemptStatus status = new TaskAttemptStatus();
status.id = attempt.getID();
status.progress = attempt.getProgress();
status.stateString = attempt.getState().name();
status.taskState = attempt.getState();
SpeculatorEvent event = new SpeculatorEvent(status, clock.getTime());
speculator.handle(event);
}
}
}
long startTime = System.currentTimeMillis();
// drain the speculator event queue
while (!speculator.eventQueueEmpty()) {
Thread.yield();
if (System.currentTimeMillis() > startTime + 130000) {
return;
}
}
clock.tickMsec(1000L);
if (clock.getTime() % 10000L == 0L) {
speculator.scanForSpeculations();
}
}
Assert.assertEquals("We got the wrong number of successful speculations.", expectedSpeculations, successfulSpeculations.get());
}
use of org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent in project hadoop by apache.
the class DefaultSpeculator method containerNeed.
/* ************************************************************* */
// This section contains the code that gets run for a SpeculatorEvent
private AtomicInteger containerNeed(TaskId taskID) {
JobId jobID = taskID.getJobId();
TaskType taskType = taskID.getTaskType();
ConcurrentMap<JobId, AtomicInteger> relevantMap = taskType == TaskType.MAP ? mapContainerNeeds : reduceContainerNeeds;
AtomicInteger result = relevantMap.get(jobID);
if (result == null) {
relevantMap.putIfAbsent(jobID, new AtomicInteger(0));
result = relevantMap.get(jobID);
}
return result;
}
Aggregations