use of org.apache.hadoop.mapred.gridmix.Statistics.JobStats in project hadoop by apache.
the class StressJobFactory method checkLoadAndGetSlotsToBackfill.
/**
* We try to use some light-weight mechanism to determine cluster load.
*
* @throws java.io.IOException
*/
protected void checkLoadAndGetSlotsToBackfill() throws IOException, InterruptedException {
if (loadStatus.getJobLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [JobLoad] Overloaded is " + Boolean.TRUE.toString() + " NumJobsBackfill is " + loadStatus.getJobLoad());
}
// stop calculation because we know it is overloaded.
return;
}
int mapCapacity = loadStatus.getMapCapacity();
int reduceCapacity = loadStatus.getReduceCapacity();
// return if the cluster status is not set
if (mapCapacity < 0 || reduceCapacity < 0) {
// missing cluster status will result into blocking of job submission
return;
}
// Determine the max permissible map & reduce task load
int maxMapLoad = (int) (overloadMapTaskMapSlotRatio * mapCapacity);
int maxReduceLoad = (int) (overloadReduceTaskReduceSlotRatio * reduceCapacity);
// compute the total number of map & reduce tasks submitted
int totalMapTasks = ClusterStats.getSubmittedMapTasks();
int totalReduceTasks = ClusterStats.getSubmittedReduceTasks();
if (LOG.isDebugEnabled()) {
LOG.debug("Total submitted map tasks: " + totalMapTasks);
LOG.debug("Total submitted reduce tasks: " + totalReduceTasks);
LOG.debug("Max map load: " + maxMapLoad);
LOG.debug("Max reduce load: " + maxReduceLoad);
}
// generate a pessimistic bound on the max running+pending map tasks
// this check is to avoid the heavy-duty actual map load calculation
int mapSlotsBackFill = (int) (maxMapLoad - totalMapTasks);
// generate a pessimistic bound on the max running+pending reduce tasks
// this check is to avoid the heavy-duty actual reduce load calculation
int reduceSlotsBackFill = (int) (maxReduceLoad - totalReduceTasks);
// maintain a list of seen job ids
Set<JobID> seenJobIDs = new HashSet<JobID>();
// permissible limit
if (totalMapTasks > maxMapLoad || totalReduceTasks > maxReduceLoad) {
// if yes, calculate the real load
// include pending & running map tasks.
float incompleteMapTasks = 0;
// include pending & running reduce tasks
float incompleteReduceTasks = 0;
for (JobStats job : ClusterStats.getRunningJobStats()) {
JobID id = job.getJob().getJobID();
seenJobIDs.add(id);
// should be smart enough to take care of completed jobs.
if (blacklistedJobs.contains(id)) {
LOG.warn("Ignoring blacklisted job: " + id);
continue;
}
int noOfMaps = job.getNoOfMaps();
int noOfReduces = job.getNoOfReds();
// What otherwise?
if (noOfMaps > 0 || noOfReduces > 0) {
// get the job's status
JobStatus status = job.getJobStatus();
// blacklist completed jobs and continue
if (status != null && status.isJobComplete()) {
LOG.warn("Blacklisting completed job: " + id);
blacklistedJobs.add(id);
continue;
}
// get the map and reduce tasks' progress
float mapProgress = 0f;
float reduceProgress = 0f;
// check if the status is missing (this can happen for unpolled jobs)
if (status != null) {
mapProgress = status.getMapProgress();
reduceProgress = status.getReduceProgress();
}
incompleteMapTasks += calcEffectiveIncompleteMapTasks(mapCapacity, noOfMaps, mapProgress);
// bail out early
int currentMapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
if (currentMapSlotsBackFill <= 0) {
// reset the reduce task load since we are bailing out
incompleteReduceTasks = totalReduceTasks;
if (LOG.isDebugEnabled()) {
LOG.debug("Terminating overload check due to high map load.");
}
break;
}
// compute the real reduce load
if (noOfReduces > 0) {
incompleteReduceTasks += calcEffectiveIncompleteReduceTasks(reduceCapacity, noOfReduces, reduceProgress);
}
// bail out early
int currentReduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
if (currentReduceSlotsBackFill <= 0) {
// reset the map task load since we are bailing out
incompleteMapTasks = totalMapTasks;
if (LOG.isDebugEnabled()) {
LOG.debug("Terminating overload check due to high reduce load.");
}
break;
}
} else {
LOG.warn("Blacklisting empty job: " + id);
blacklistedJobs.add(id);
}
}
// calculate the real map load on the cluster
mapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
// calculate the real reduce load on the cluster
reduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
// clean up the backlisted set to keep the memory footprint minimal
// retain only the jobs that are seen in this cycle
blacklistedJobs.retainAll(seenJobIDs);
if (LOG.isDebugEnabled() && blacklistedJobs.size() > 0) {
LOG.debug("Blacklisted jobs count: " + blacklistedJobs.size());
}
}
// update
loadStatus.updateMapLoad(mapSlotsBackFill);
loadStatus.updateReduceLoad(reduceSlotsBackFill);
if (loadStatus.getMapLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [MAP-LOAD] Overloaded is " + Boolean.TRUE.toString() + " MapSlotsBackfill is " + loadStatus.getMapLoad());
}
// stop calculation because we know it is overloaded.
return;
}
if (loadStatus.getReduceLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [REDUCE-LOAD] Overloaded is " + Boolean.TRUE.toString() + " ReduceSlotsBackfill is " + loadStatus.getReduceLoad());
}
// stop calculation because we know it is overloaded.
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [OVERALL] Overloaded is " + Boolean.FALSE.toString() + "Current load Status is " + loadStatus);
}
}
use of org.apache.hadoop.mapred.gridmix.Statistics.JobStats in project hadoop by apache.
the class TestGridmixSummary method testExecutionSummarizer.
/**
* Test {@link ExecutionSummarizer}.
*/
@Test
@SuppressWarnings({ "unchecked", "rawtypes" })
public void testExecutionSummarizer() throws IOException {
Configuration conf = new Configuration();
ExecutionSummarizer es = new ExecutionSummarizer();
assertEquals("ExecutionSummarizer init failed", Summarizer.NA, es.getCommandLineArgsString());
long startTime = System.currentTimeMillis();
// test configuration parameters
String[] initArgs = new String[] { "-Xmx20m", "-Dtest.args='test'" };
es = new ExecutionSummarizer(initArgs);
assertEquals("ExecutionSummarizer init failed", "-Xmx20m -Dtest.args='test'", es.getCommandLineArgsString());
// test start time
assertTrue("Start time mismatch", es.getStartTime() >= startTime);
assertTrue("Start time mismatch", es.getStartTime() <= System.currentTimeMillis());
// test start() of ExecutionSummarizer
es.update(null);
assertEquals("ExecutionSummarizer init failed", 0, es.getSimulationStartTime());
testExecutionSummarizer(0, 0, 0, 0, 0, 0, 0, es);
long simStartTime = System.currentTimeMillis();
es.start(null);
assertTrue("Simulation start time mismatch", es.getSimulationStartTime() >= simStartTime);
assertTrue("Simulation start time mismatch", es.getSimulationStartTime() <= System.currentTimeMillis());
// test with job stats
JobStats stats = generateFakeJobStats(1, 10, true, false);
es.update(stats);
testExecutionSummarizer(1, 10, 0, 1, 1, 0, 0, es);
// test with failed job
stats = generateFakeJobStats(5, 1, false, false);
es.update(stats);
testExecutionSummarizer(6, 11, 0, 2, 1, 1, 0, es);
// test with successful but lost job
stats = generateFakeJobStats(1, 1, true, true);
es.update(stats);
testExecutionSummarizer(7, 12, 0, 3, 1, 1, 1, es);
// test with failed but lost job
stats = generateFakeJobStats(2, 2, false, true);
es.update(stats);
testExecutionSummarizer(9, 14, 0, 4, 1, 1, 2, es);
// test finalize
// define a fake job factory
JobFactory factory = new FakeJobFactory(conf);
// fake the num jobs in trace
factory.numJobsInTrace = 3;
Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp"));
Path testDir = new Path(rootTempDir, "testGridmixSummary");
Path testTraceFile = new Path(testDir, "test-trace.json");
FileSystem fs = FileSystem.getLocal(conf);
fs.create(testTraceFile).close();
// finalize the summarizer
UserResolver resolver = new RoundRobinUserResolver();
DataStatistics dataStats = new DataStatistics(100, 2, true);
String policy = GridmixJobSubmissionPolicy.REPLAY.name();
conf.set(GridmixJobSubmissionPolicy.JOB_SUBMISSION_POLICY, policy);
es.finalize(factory, testTraceFile.toString(), 1024L, resolver, dataStats, conf);
// test num jobs in trace
assertEquals("Mismtach in num jobs in trace", 3, es.getNumJobsInTrace());
// test trace signature
String tid = ExecutionSummarizer.getTraceSignature(testTraceFile.toString());
assertEquals("Mismatch in trace signature", tid, es.getInputTraceSignature());
// test trace location
Path qPath = fs.makeQualified(testTraceFile);
assertEquals("Mismatch in trace filename", qPath.toString(), es.getInputTraceLocation());
// test expected data size
assertEquals("Mismatch in expected data size", "1 K", es.getExpectedDataSize());
// test input data statistics
assertEquals("Mismatch in input data statistics", ExecutionSummarizer.stringifyDataStatistics(dataStats), es.getInputDataStatistics());
// test user resolver
assertEquals("Mismatch in user resolver", resolver.getClass().getName(), es.getUserResolver());
// test policy
assertEquals("Mismatch in policy", policy, es.getJobSubmissionPolicy());
// test data stringification using large data
es.finalize(factory, testTraceFile.toString(), 1024 * 1024 * 1024 * 10L, resolver, dataStats, conf);
assertEquals("Mismatch in expected data size", "10 G", es.getExpectedDataSize());
// test trace signature uniqueness
// touch the trace file
fs.delete(testTraceFile, false);
// sleep for 1 sec
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
}
fs.create(testTraceFile).close();
es.finalize(factory, testTraceFile.toString(), 0L, resolver, dataStats, conf);
// test missing expected data size
assertEquals("Mismatch in trace data size", Summarizer.NA, es.getExpectedDataSize());
assertFalse("Mismatch in trace signature", tid.equals(es.getInputTraceSignature()));
// get the new identifier
tid = ExecutionSummarizer.getTraceSignature(testTraceFile.toString());
assertEquals("Mismatch in trace signature", tid, es.getInputTraceSignature());
testTraceFile = new Path(testDir, "test-trace2.json");
fs.create(testTraceFile).close();
es.finalize(factory, testTraceFile.toString(), 0L, resolver, dataStats, conf);
assertFalse("Mismatch in trace signature", tid.equals(es.getInputTraceSignature()));
// get the new identifier
tid = ExecutionSummarizer.getTraceSignature(testTraceFile.toString());
assertEquals("Mismatch in trace signature", tid, es.getInputTraceSignature());
// finalize trace identifier '-' input
es.finalize(factory, "-", 0L, resolver, dataStats, conf);
assertEquals("Mismatch in trace signature", Summarizer.NA, es.getInputTraceSignature());
assertEquals("Mismatch in trace file location", Summarizer.NA, es.getInputTraceLocation());
}
Aggregations