use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestFSDownload method testDirDownload.
@Test(timeout = 10000)
public void testDirDownload() throws IOException, InterruptedException {
Configuration conf = new Configuration();
FileContext files = FileContext.getLocalFSFileContext(conf);
final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
files.mkdir(basedir, null, true);
conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
Map<LocalResource, LocalResourceVisibility> rsrcVis = new HashMap<LocalResource, LocalResourceVisibility>();
Random rand = new Random();
long sharedSeed = rand.nextLong();
System.out.println("SEED: " + sharedSeed);
Map<LocalResource, Future<Path>> pending = new HashMap<LocalResource, Future<Path>>();
ExecutorService exec = HadoopExecutors.newSingleThreadExecutor();
LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
for (int i = 0; i < 5; ++i) {
LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
if (i % 2 == 1) {
vis = LocalResourceVisibility.APPLICATION;
Path p = new Path(basedir, "dir" + i + ".jar");
LocalResource rsrc = createJar(files, p, vis);
rsrcVis.put(rsrc, vis);
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), conf);
destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
pending.put(rsrc, exec.submit(fsd));
while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
for (Future<Path> path : pending.values()) {
try {
for (Map.Entry<LocalResource, Future<Path>> p : pending.entrySet()) {
Path localized = p.getValue().get();
FileStatus status = files.getFileStatus(localized);
System.out.println("Testing path " + localized);
assert (status.isDirectory());
assert (rsrcVis.containsKey(p.getKey()));
verifyPermsRecursively(localized.getFileSystem(conf), files, localized, rsrcVis.get(p.getKey()));
} catch (ExecutionException e) {
throw new IOException("Failed exec", e);
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestShufflePlugin method testProviderApi.
public /**
* A testing method verifying availability and accessibility of API needed for
* AuxiliaryService(s) which are "Shuffle-Providers" (ShuffleHandler and 3rd party plugins)
void testProviderApi() {
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
JobConf mockJobConf = mock(JobConf.class);
try {
mockLocalDirAllocator.getLocalPathToRead("", mockJobConf);
} catch (Exception e) {
assertTrue("Threw exception:" + e, false);
use of org.apache.hadoop.fs.LocalDirAllocator in project stocator by CODAIT.
the class SwiftAPIClient method createTmpFileForWrite.
synchronized File createTmpFileForWrite(String pathStr, long size) throws IOException {
LOG.trace("Create temp file for write {}. size {}", pathStr, size);
if (directoryAllocator == null) {
String bufferTargetDir = !bufferDir.isEmpty() ? BUFFER_DIR : "hadoop.tmp.dir";
LOG.trace("Local buffer directorykey is {}", bufferTargetDir);
directoryAllocator = new LocalDirAllocator(bufferTargetDir);
return directoryAllocator.createTmpFileForWrite(pathStr, size, conf);
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestValuesIterator method createInMemStreams.
* create inmemory segments
* @return
* @throws IOException
public List<TezMerger.Segment> createInMemStreams() throws IOException {
int numberOfStreams = Math.max(2, rnd.nextInt(10));"No of streams : " + numberOfStreams);
SerializationFactory serializationFactory = new SerializationFactory(conf);
Serializer keySerializer = serializationFactory.getSerializer(keyClass);
Serializer valueSerializer = serializationFactory.getSerializer(valClass);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext context = createTezInputContext();
MergeManager mergeManager = new MergeManager(conf, fs, localDirAllocator, context, null, null, null, null, null, 1024 * 1024 * 10, null, false, -1);
DataOutputBuffer keyBuf = new DataOutputBuffer();
DataOutputBuffer valBuf = new DataOutputBuffer();
DataInputBuffer keyIn = new DataInputBuffer();
DataInputBuffer valIn = new DataInputBuffer();;;
List<TezMerger.Segment> segments = new LinkedList<TezMerger.Segment>();
for (int i = 0; i < numberOfStreams; i++) {
BoundedByteArrayOutputStream bout = new BoundedByteArrayOutputStream(1024 * 1024);
InMemoryWriter writer = new InMemoryWriter(bout);
Map<Writable, Writable> data = createData();
// write data
for (Map.Entry<Writable, Writable> entry : data.entrySet()) {
keyIn.reset(keyBuf.getData(), 0, keyBuf.getLength());
valIn.reset(valBuf.getData(), 0, valBuf.getLength());
writer.append(keyIn, valIn);
originalData.put(entry.getKey(), entry.getValue());
IFile.Reader reader = new InMemoryReader(mergeManager, null, bout.getBuffer(), 0, bout.getBuffer().length);
segments.add(new TezMerger.Segment(reader, null));
return segments;
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestMergeManager method testIntermediateMemoryMerge.
@Test(timeout = 60000l)
public void testIntermediateMemoryMerge() throws Throwable {
Configuration conf = new TezConfiguration(defaultConf);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 3);
Path localDir = new Path(workDir, "local");
Path srcDir = new Path(workDir, "srcData");
conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
FileSystem localFs = FileSystem.getLocal(conf);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
assertEquals(0, mergeManager.getUsedMemory());
assertEquals(0, mergeManager.getCommitMemory());
* Test #1
* - Have 4 segments where all of them can fit into memory.
* - After 3 segment commits, it would trigger mem-to-mem merge.
* - All of them can be merged in memory.
InputAttemptIdentifier inputAttemptIdentifier1 = new InputAttemptIdentifier(0, 0);
InputAttemptIdentifier inputAttemptIdentifier2 = new InputAttemptIdentifier(1, 0);
InputAttemptIdentifier inputAttemptIdentifier3 = new InputAttemptIdentifier(2, 0);
InputAttemptIdentifier inputAttemptIdentifier4 = new InputAttemptIdentifier(3, 0);
byte[] data1 = generateDataBySize(conf, 10, inputAttemptIdentifier1);
byte[] data2 = generateDataBySize(conf, 20, inputAttemptIdentifier2);
byte[] data3 = generateDataBySize(conf, 200, inputAttemptIdentifier3);
byte[] data4 = generateDataBySize(conf, 20000, inputAttemptIdentifier4);
MapOutput mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
MapOutput mo2 = mergeManager.reserve(inputAttemptIdentifier1, data2.length, data2.length, 0);
MapOutput mo3 = mergeManager.reserve(inputAttemptIdentifier1, data3.length, data3.length, 0);
MapOutput mo4 = mergeManager.reserve(inputAttemptIdentifier1, data4.length, data4.length, 0);
assertEquals(MapOutput.Type.MEMORY, mo1.getType());
assertEquals(MapOutput.Type.MEMORY, mo2.getType());
assertEquals(MapOutput.Type.MEMORY, mo3.getType());
assertEquals(MapOutput.Type.MEMORY, mo4.getType());
assertEquals(0, mergeManager.getCommitMemory());
// size should be ~20230.
assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
// Committing 3 segments should trigger mem-to-mem merge
// Wait for mem-to-mem to complete
assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
assertEquals(1, mergeManager.inMemoryMapOutputs.size());
* Test #2
* - Have 4 segments where all of them can fit into memory, but one of
* them would be big enough that it can not be fit in memory during
* mem-to-mem merging.
* - After 3 segment commits, it would trigger mem-to-mem merge.
* - Smaller segments which can be fit in additional memory allocated gets
* merged.
mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
// Single shuffle limit is 25% of 2000000
data1 = generateDataBySize(conf, 10, inputAttemptIdentifier1);
data2 = generateDataBySize(conf, 400000, inputAttemptIdentifier2);
data3 = generateDataBySize(conf, 400000, inputAttemptIdentifier3);
data4 = generateDataBySize(conf, 400000, inputAttemptIdentifier4);
mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
assertEquals(MapOutput.Type.MEMORY, mo1.getType());
assertEquals(MapOutput.Type.MEMORY, mo2.getType());
assertEquals(MapOutput.Type.MEMORY, mo3.getType());
assertEquals(MapOutput.Type.MEMORY, mo4.getType());
assertEquals(0, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
// Committing 3 segments should trigger mem-to-mem merge
// Wait for mem-to-mem to complete
* Already all segments are in memory which is around 120000. It
* would not be able to allocate more than 800000 for mem-to-mem. So it
* would pick up only 2 small segments which can be accomodated within
* 800000.
assertEquals(1, mergeManager.inMemoryMergedMapOutputs.size());
assertEquals(2, mergeManager.inMemoryMapOutputs.size());
* Test #3
* - Set number of segments for merging to 4.
* - Have 4 in-memory segments of size 400000 each
* - Committing 4 segments would trigger mem-to-mem
* - But none of them can be merged as there is no enough head room for
* merging in memory.
mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
// Single shuffle limit is 25% of 2000000
data1 = generateDataBySize(conf, 400000, inputAttemptIdentifier1);
data2 = generateDataBySize(conf, 400000, inputAttemptIdentifier2);
data3 = generateDataBySize(conf, 400000, inputAttemptIdentifier3);
data4 = generateDataBySize(conf, 400000, inputAttemptIdentifier4);
mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
assertEquals(MapOutput.Type.MEMORY, mo1.getType());
assertEquals(MapOutput.Type.MEMORY, mo2.getType());
assertEquals(MapOutput.Type.MEMORY, mo3.getType());
assertEquals(MapOutput.Type.MEMORY, mo4.getType());
assertEquals(0, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
// Committing 3 segments should trigger mem-to-mem merge
// Wait for mem-to-mem to complete
// None of them can be merged as new mem needed for mem-to-mem can't
// accomodate any segements
assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
assertEquals(4, mergeManager.inMemoryMapOutputs.size());
* Test #4
* - Set number of segments for merging to 4.
* - Have 4 in-memory segments of size {490000,490000,490000,230000}
* - Committing 4 segments would trigger mem-to-mem
* - But only 300000 can fit into memory. This should not be
* merged as there is no point in merging single segment. It should be
* added back to the inMemorySegments
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
// Single shuffle limit is 25% of 2000000
data1 = generateDataBySize(conf, 490000, inputAttemptIdentifier1);
data2 = generateDataBySize(conf, 490000, inputAttemptIdentifier2);
data3 = generateDataBySize(conf, 490000, inputAttemptIdentifier3);
data4 = generateDataBySize(conf, 230000, inputAttemptIdentifier4);
mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));
assertEquals(MapOutput.Type.MEMORY, mo1.getType());
assertEquals(MapOutput.Type.MEMORY, mo2.getType());
assertEquals(MapOutput.Type.MEMORY, mo3.getType());
assertEquals(MapOutput.Type.MEMORY, mo4.getType());
assertEquals(0, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
// Committing 4 segments should trigger mem-to-mem merge
// 4 segments were there originally in inMemoryMapOutput.
int numberOfMapOutputs = 4;
// Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
// into memory, it should return early
// Check if inMemorySegment has got the MapOutput back for merging later
assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());
* Test #5
* - Same to #4, but calling mergeManager.close(false) and confirm that final merge doesn't occur.
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 4);
mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
// Single shuffle limit is 25% of 2000000
data1 = generateDataBySize(conf, 490000, inputAttemptIdentifier1);
data2 = generateDataBySize(conf, 490000, inputAttemptIdentifier2);
data3 = generateDataBySize(conf, 490000, inputAttemptIdentifier3);
data4 = generateDataBySize(conf, 230000, inputAttemptIdentifier4);
mo1 = mergeManager.reserve(inputAttemptIdentifier1, data1.length, data1.length, 0);
mo2 = mergeManager.reserve(inputAttemptIdentifier2, data2.length, data2.length, 0);
mo3 = mergeManager.reserve(inputAttemptIdentifier3, data3.length, data3.length, 0);
mo4 = mergeManager.reserve(inputAttemptIdentifier4, data4.length, data4.length, 0);
assertTrue(mergeManager.getUsedMemory() >= (490000 + 490000 + 490000 + 23000));
assertEquals(MapOutput.Type.MEMORY, mo1.getType());
assertEquals(MapOutput.Type.MEMORY, mo2.getType());
assertEquals(MapOutput.Type.MEMORY, mo3.getType());
assertEquals(MapOutput.Type.MEMORY, mo4.getType());
assertEquals(0, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length + data3.length + data4.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, mo1.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, mo2.getMemory(), 0, data2.length);
System.arraycopy(data3, 0, mo3.getMemory(), 0, data3.length);
System.arraycopy(data4, 0, mo4.getMemory(), 0, data4.length);
// Committing 4 segments should trigger mem-to-mem merge
// 4 segments were there originally in inMemoryMapOutput.
numberOfMapOutputs = 4;
// Wait for mem-to-mem to complete. Since only 1 segment (230000) can fit
// into memory, it should return early
// Check if inMemorySegment has got the MapOutput back for merging later
assertEquals(numberOfMapOutputs, mergeManager.inMemoryMapOutputs.size());