use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestMerger method testInMemoryAndOnDiskMerger.
@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
JobID jobId = new JobID("a", 0);
TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
// write map outputs
Map<String, String> map1 = new TreeMap<String, String>();
map1.put("apple", "disgusting");
map1.put("carrot", "delicious");
Map<String, String> map2 = new TreeMap<String, String>();
map1.put("banana", "pretty good");
byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
// create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs1.add(mapOutput1);
mapOutputs1.add(mapOutput2);
inMemoryMerger.merge(mapOutputs1);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
// write map outputs
Map<String, String> map3 = new TreeMap<String, String>();
map3.put("apple", "awesome");
map3.put("carrot", "amazing");
Map<String, String> map4 = new TreeMap<String, String>();
map4.put("banana", "bla");
byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
// // create merger and run merge
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
mapOutputs2.add(mapOutput3);
mapOutputs2.add(mapOutput4);
inMemoryMerger2.merge(mapOutputs2);
Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
List<String> keys = new ArrayList<String>();
List<String> values = new ArrayList<String>();
while (iterator.hasNext()) {
CompressAwarePath next = iterator.next();
readOnDiskMapOutput(conf, fs, next, keys, values);
paths.add(next);
}
Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
mergeManager.close();
mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
onDiskMerger.merge(paths);
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
keys = new ArrayList<String>();
values = new ArrayList<String>();
readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
mergeManager.close();
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestShuffleScheduler method TestSucceedAndFailedCopyMap.
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(2);
//mock creation
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
// needed for mock with generic
@SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
@SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
MapHost host1 = new MapHost("host1", null);
TaskAttemptID failedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
TaskAttemptID succeedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
// handle output fetch failure for failedAttemptID, part I
scheduler.hostFailed(host1.getHostName());
// handle output fetch succeed for succeedAttemptID
long bytes = (long) 500 * 1024 * 1024;
scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
// handle output fetch failure for failedAttemptID, part II
// for MAPREDUCE-6361: verify no NPE exception get thrown out
scheduler.copyFailed(failedAttemptID, host1, true, false);
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class S3AFileSystem method createTmpFileForWrite.
/**
* Demand create the directory allocator, then create a temporary file.
* {@link LocalDirAllocator#createTmpFileForWrite(String, long, Configuration)}.
* @param pathStr prefix for the temporary file
* @param size the size of the file that is going to be written
* @param conf the Configuration object
* @return a unique temporary file
* @throws IOException IO problems
*/
synchronized File createTmpFileForWrite(String pathStr, long size, Configuration conf) throws IOException {
if (directoryAllocator == null) {
String bufferDir = conf.get(BUFFER_DIR) != null ? BUFFER_DIR : "hadoop.tmp.dir";
directoryAllocator = new LocalDirAllocator(bufferDir);
}
return directoryAllocator.createTmpFileForWrite(pathStr, size, conf);
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestFSDownload method testUniqueDestinationPath.
@Test(timeout = 10000)
public void testUniqueDestinationPath() throws Exception {
Configuration conf = new Configuration();
FileContext files = FileContext.getLocalFSFileContext(conf);
final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
files.mkdir(basedir, null, true);
conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
ExecutorService singleThreadedExec = HadoopExecutors.newSingleThreadExecutor();
LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), conf);
destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
Path p = new Path(basedir, "dir" + 0 + ".jar");
LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
LocalResource rsrc = createJar(files, p, vis);
FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
Future<Path> rPath = singleThreadedExec.submit(fsd);
singleThreadedExec.shutdown();
while (!singleThreadedExec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
Assert.assertTrue(rPath.isDone());
// Now FSDownload will not create a random directory to localize the
// resource. Therefore the final localizedPath for the resource should be
// destination directory (passed as an argument) + file name.
Assert.assertEquals(destPath, rPath.get().getParent());
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestFSDownload method testDownloadBadPublic.
@Test(timeout = 10000)
public void testDownloadBadPublic() throws IOException, URISyntaxException, InterruptedException {
Configuration conf = new Configuration();
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
FileContext files = FileContext.getLocalFSFileContext(conf);
final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
files.mkdir(basedir, null, true);
conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
Map<LocalResource, LocalResourceVisibility> rsrcVis = new HashMap<LocalResource, LocalResourceVisibility>();
Random rand = new Random();
long sharedSeed = rand.nextLong();
rand.setSeed(sharedSeed);
System.out.println("SEED: " + sharedSeed);
Map<LocalResource, Future<Path>> pending = new HashMap<LocalResource, Future<Path>>();
ExecutorService exec = HadoopExecutors.newSingleThreadExecutor();
LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
int size = 512;
LocalResourceVisibility vis = LocalResourceVisibility.PUBLIC;
Path path = new Path(basedir, "test-file");
LocalResource rsrc = createFile(files, path, size, rand, vis);
rsrcVis.put(rsrc, vis);
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
pending.put(rsrc, exec.submit(fsd));
exec.shutdown();
while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
Assert.assertTrue(pending.get(rsrc).isDone());
try {
for (Map.Entry<LocalResource, Future<Path>> p : pending.entrySet()) {
p.getValue().get();
Assert.fail("We localized a file that is not public.");
}
} catch (ExecutionException e) {
Assert.assertTrue(e.getCause() instanceof IOException);
}
}
Aggregations