use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class StressJobFactory method checkLoadAndGetSlotsToBackfill.
/**
* We try to use some light-weight mechanism to determine cluster load.
*
* @throws java.io.IOException
*/
protected void checkLoadAndGetSlotsToBackfill() throws IOException, InterruptedException {
if (loadStatus.getJobLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [JobLoad] Overloaded is " + Boolean.TRUE.toString() + " NumJobsBackfill is " + loadStatus.getJobLoad());
}
// stop calculation because we know it is overloaded.
return;
}
int mapCapacity = loadStatus.getMapCapacity();
int reduceCapacity = loadStatus.getReduceCapacity();
// return if the cluster status is not set
if (mapCapacity < 0 || reduceCapacity < 0) {
// missing cluster status will result into blocking of job submission
return;
}
// Determine the max permissible map & reduce task load
int maxMapLoad = (int) (overloadMapTaskMapSlotRatio * mapCapacity);
int maxReduceLoad = (int) (overloadReduceTaskReduceSlotRatio * reduceCapacity);
// compute the total number of map & reduce tasks submitted
int totalMapTasks = ClusterStats.getSubmittedMapTasks();
int totalReduceTasks = ClusterStats.getSubmittedReduceTasks();
if (LOG.isDebugEnabled()) {
LOG.debug("Total submitted map tasks: " + totalMapTasks);
LOG.debug("Total submitted reduce tasks: " + totalReduceTasks);
LOG.debug("Max map load: " + maxMapLoad);
LOG.debug("Max reduce load: " + maxReduceLoad);
}
// generate a pessimistic bound on the max running+pending map tasks
// this check is to avoid the heavy-duty actual map load calculation
int mapSlotsBackFill = (int) (maxMapLoad - totalMapTasks);
// generate a pessimistic bound on the max running+pending reduce tasks
// this check is to avoid the heavy-duty actual reduce load calculation
int reduceSlotsBackFill = (int) (maxReduceLoad - totalReduceTasks);
// maintain a list of seen job ids
Set<JobID> seenJobIDs = new HashSet<JobID>();
// permissible limit
if (totalMapTasks > maxMapLoad || totalReduceTasks > maxReduceLoad) {
// if yes, calculate the real load
// include pending & running map tasks.
float incompleteMapTasks = 0;
// include pending & running reduce tasks
float incompleteReduceTasks = 0;
for (JobStats job : ClusterStats.getRunningJobStats()) {
JobID id = job.getJob().getJobID();
seenJobIDs.add(id);
// should be smart enough to take care of completed jobs.
if (blacklistedJobs.contains(id)) {
LOG.warn("Ignoring blacklisted job: " + id);
continue;
}
int noOfMaps = job.getNoOfMaps();
int noOfReduces = job.getNoOfReds();
// What otherwise?
if (noOfMaps > 0 || noOfReduces > 0) {
// get the job's status
JobStatus status = job.getJobStatus();
// blacklist completed jobs and continue
if (status != null && status.isJobComplete()) {
LOG.warn("Blacklisting completed job: " + id);
blacklistedJobs.add(id);
continue;
}
// get the map and reduce tasks' progress
float mapProgress = 0f;
float reduceProgress = 0f;
// check if the status is missing (this can happen for unpolled jobs)
if (status != null) {
mapProgress = status.getMapProgress();
reduceProgress = status.getReduceProgress();
}
incompleteMapTasks += calcEffectiveIncompleteMapTasks(mapCapacity, noOfMaps, mapProgress);
// bail out early
int currentMapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
if (currentMapSlotsBackFill <= 0) {
// reset the reduce task load since we are bailing out
incompleteReduceTasks = totalReduceTasks;
if (LOG.isDebugEnabled()) {
LOG.debug("Terminating overload check due to high map load.");
}
break;
}
// compute the real reduce load
if (noOfReduces > 0) {
incompleteReduceTasks += calcEffectiveIncompleteReduceTasks(reduceCapacity, noOfReduces, reduceProgress);
}
// bail out early
int currentReduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
if (currentReduceSlotsBackFill <= 0) {
// reset the map task load since we are bailing out
incompleteMapTasks = totalMapTasks;
if (LOG.isDebugEnabled()) {
LOG.debug("Terminating overload check due to high reduce load.");
}
break;
}
} else {
LOG.warn("Blacklisting empty job: " + id);
blacklistedJobs.add(id);
}
}
// calculate the real map load on the cluster
mapSlotsBackFill = (int) (maxMapLoad - incompleteMapTasks);
// calculate the real reduce load on the cluster
reduceSlotsBackFill = (int) (maxReduceLoad - incompleteReduceTasks);
// clean up the backlisted set to keep the memory footprint minimal
// retain only the jobs that are seen in this cycle
blacklistedJobs.retainAll(seenJobIDs);
if (LOG.isDebugEnabled() && blacklistedJobs.size() > 0) {
LOG.debug("Blacklisted jobs count: " + blacklistedJobs.size());
}
}
// update
loadStatus.updateMapLoad(mapSlotsBackFill);
loadStatus.updateReduceLoad(reduceSlotsBackFill);
if (loadStatus.getMapLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [MAP-LOAD] Overloaded is " + Boolean.TRUE.toString() + " MapSlotsBackfill is " + loadStatus.getMapLoad());
}
// stop calculation because we know it is overloaded.
return;
}
if (loadStatus.getReduceLoad() <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [REDUCE-LOAD] Overloaded is " + Boolean.TRUE.toString() + " ReduceSlotsBackfill is " + loadStatus.getReduceLoad());
}
// stop calculation because we know it is overloaded.
return;
}
if (LOG.isDebugEnabled()) {
LOG.debug(System.currentTimeMillis() + " [OVERALL] Overloaded is " + Boolean.FALSE.toString() + "Current load Status is " + loadStatus);
}
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class TestMerger method testInMemoryAndOnDiskMerger.
@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
JobID jobId = new JobID("a", 0);
TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
// write map outputs
Map<String, String> map1 = new TreeMap<String, String>();
map1.put("apple", "disgusting");
map1.put("carrot", "delicious");
Map<String, String> map2 = new TreeMap<String, String>();
map1.put("banana", "pretty good");
byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
// create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs1.add(mapOutput1);
mapOutputs1.add(mapOutput2);
inMemoryMerger.merge(mapOutputs1);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
// write map outputs
Map<String, String> map3 = new TreeMap<String, String>();
map3.put("apple", "awesome");
map3.put("carrot", "amazing");
Map<String, String> map4 = new TreeMap<String, String>();
map4.put("banana", "bla");
byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
// // create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs2.add(mapOutput3);
mapOutputs2.add(mapOutput4);
inMemoryMerger2.merge(mapOutputs2);
Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
List<String> keys = new ArrayList<String>();
List<String> values = new ArrayList<String>();
while (iterator.hasNext()) {
CompressAwarePath next = iterator.next();
readOnDiskMapOutput(conf, fs, next, keys, values);
paths.add(next);
}
Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
mergeManager.close();
mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
onDiskMerger.merge(paths);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
keys = new ArrayList<String>();
values = new ArrayList<String>();
readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
mergeManager.close();
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class TestShuffleScheduler method TestSucceedAndFailedCopyMap.
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(2);
//mock creation
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
// needed for mock with generic
@SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
@SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
MapHost host1 = new MapHost("host1", null);
TaskAttemptID failedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
TaskAttemptID succeedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
// handle output fetch failure for failedAttemptID, part I
scheduler.hostFailed(host1.getHostName());
// handle output fetch succeed for succeedAttemptID
long bytes = (long) 500 * 1024 * 1024;
scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
// handle output fetch failure for failedAttemptID, part II
// for MAPREDUCE-6361: verify no NPE exception get thrown out
scheduler.copyFailed(failedAttemptID, host1, true, false);
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class TestCLI method testListAttemptIdsWithInvalidInputs.
@Test
public void testListAttemptIdsWithInvalidInputs() throws Exception {
JobID jobId = JobID.forName(jobIdStr);
Cluster mockCluster = mock(Cluster.class);
Job job = mock(Job.class);
CLI cli = spy(new CLI(new Configuration()));
doReturn(mockCluster).when(cli).createCluster();
when(mockCluster.getJob(jobId)).thenReturn(job);
int retCode_JOB_SETUP = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "JOB_SETUP", "running" });
int retCode_JOB_CLEANUP = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "JOB_CLEANUP", "running" });
int retCode_invalidTaskState = cli.run(new String[] { "-list-attempt-ids", jobIdStr, "REDUCE", "complete" });
String jobIdStr2 = "job_1015298225799_0016";
int retCode_invalidJobId = cli.run(new String[] { "-list-attempt-ids", jobIdStr2, "MAP", "running" });
assertEquals("JOB_SETUP is an invalid input,exit code should be -1", -1, retCode_JOB_SETUP);
assertEquals("JOB_CLEANUP is an invalid input,exit code should be -1", -1, retCode_JOB_CLEANUP);
assertEquals("complete is an invalid input,exit code should be -1", -1, retCode_invalidTaskState);
assertEquals("Non existing job id should be skippted with -1", -1, retCode_invalidJobId);
}
use of org.apache.hadoop.mapreduce.JobID in project hadoop by apache.
the class TestJobSplitWriter method testMaxBlockLocationsOldSplits.
@Test
public void testMaxBlockLocationsOldSplits() throws Exception {
TEST_DIR.mkdirs();
try {
Configuration conf = new Configuration();
conf.setInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, 4);
Path submitDir = new Path(TEST_DIR.getAbsolutePath());
FileSystem fs = FileSystem.getLocal(conf);
org.apache.hadoop.mapred.FileSplit split = new org.apache.hadoop.mapred.FileSplit(new Path("/some/path"), 0, 1, new String[] { "loc1", "loc2", "loc3", "loc4", "loc5" });
JobSplitWriter.createSplitFiles(submitDir, conf, fs, new org.apache.hadoop.mapred.InputSplit[] { split });
JobSplit.TaskSplitMetaInfo[] infos = SplitMetaInfoReader.readSplitMetaInfo(new JobID(), fs, conf, submitDir);
assertEquals("unexpected number of splits", 1, infos.length);
assertEquals("unexpected number of split locations", 4, infos[0].getLocations().length);
} finally {
FileUtil.fullyDelete(TEST_DIR);
}
}
Aggregations