use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestMergeManager method testIntermediateMemoryMergeAccounting.
@Test(timeout = 20000)
public void testIntermediateMemoryMergeAccounting() throws Exception {
Configuration conf = new TezConfiguration(defaultConf);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2);
Path localDir = new Path(workDir, "local");
Path srcDir = new Path(workDir, "srcData");
localFs.mkdirs(localDir);
localFs.mkdirs(srcDir);
conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
FileSystem localFs = FileSystem.getLocal(conf);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
mergeManager.configureAndStart();
assertEquals(0, mergeManager.getUsedMemory());
assertEquals(0, mergeManager.getCommitMemory());
byte[] data1 = generateData(conf, 10, null);
byte[] data2 = generateData(conf, 20, null);
MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0);
MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0);
assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType());
assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType());
assertEquals(0, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length);
System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length);
secondMapOutput.commit();
assertEquals(data2.length, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
firstMapOutput.commit();
mergeManager.waitForMemToMemMerge();
assertEquals(data1.length + data2.length, mergeManager.getCommitMemory());
assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
}
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestMergeManager method testLocalDiskMergeMultipleTasks.
void testLocalDiskMergeMultipleTasks(final boolean interruptInMiddle) throws IOException, InterruptedException {
Configuration conf = new TezConfiguration(defaultConf);
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
Path localDir = new Path(workDir, "local");
Path srcDir = new Path(workDir, "srcData");
localFs.mkdirs(localDir);
localFs.mkdirs(srcDir);
conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
FileSystem localFs = FileSystem.getLocal(conf);
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString());
InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString());
ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class);
MergeManager t0mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, 2000000, null, false, -1) {
// override for interruptInMiddle testing
@Override
public synchronized void closeOnDiskFile(FileChunk file) {
if (interruptInMiddle) {
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
}
super.closeOnDiskFile(file);
}
};
MergeManager t0mergeManager = spy(t0mergeManagerReal);
t0mergeManager.configureAndStart();
MergeManager t1mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t1inputContext, null, null, null, null, t1exceptionReporter, 2000000, null, false, -1);
MergeManager t1mergeManager = spy(t1mergeManagerReal);
// Partition 0 Keys 0-2, Partition 1 Keys 3-5
SrcFileInfo src1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0);
// Partition 0 Keys 6-8, Partition 1 Keys 9-11
SrcFileInfo src2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6);
// Simulating Task 0 fetches partition 0. (targetIndex = 0,1)
// Simulating Task 1 fetches partition 1. (targetIndex = 0,1)
InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch(t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager);
MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch(t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager);
MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch(t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager);
MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch(t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager);
t0MapOutput0.commit();
t0MapOutput1.commit();
verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath());
verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath());
// Run the OnDiskMerge via MergeManager
// Simulate the thread invocation - remove files, and invoke merge
List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>();
t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs);
t0mergeManager.onDiskMapOutputs.clear();
if (!interruptInMiddle) {
t0mergeManager.onDiskMerger.merge(t0MergeFiles);
Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size());
} else {
// Start Interrupting thread
Thread interruptingThread = new Thread(new InterruptingThread(t0mergeManager.onDiskMerger));
interruptingThread.start();
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// Will be interrupted in the middle by interruptingThread.
t0mergeManager.onDiskMerger.startMerge(Sets.newHashSet(t0MergeFiles));
t0mergeManager.onDiskMerger.waitForMerge();
Assert.assertNotEquals(1, t0mergeManager.onDiskMapOutputs.size());
}
if (!interruptInMiddle) {
t1MapOutput0.commit();
t1MapOutput1.commit();
verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath());
verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath());
// Run the OnDiskMerge via MergeManager
// Simulate the thread invocation - remove files, and invoke merge
List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>();
t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs);
t1mergeManager.onDiskMapOutputs.clear();
t1mergeManager.onDiskMerger.merge(t1MergeFiles);
Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size());
Assert.assertNotEquals(t0mergeManager.onDiskMapOutputs.iterator().next().getPath(), t1mergeManager.onDiskMapOutputs.iterator().next().getPath());
Assert.assertTrue(t0mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t0inputContext.getUniqueIdentifier()));
Assert.assertTrue(t1mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t1inputContext.getUniqueIdentifier()));
}
}
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestDefaultSorter method setup.
@Before
public void setup() throws IOException {
conf = new Configuration();
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
// DefaultSorter
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.LEGACY.name());
conf.set("fs.defaultFS", "file:///");
localFs = FileSystem.getLocal(conf);
workingDir = new Path(new Path(System.getProperty("test.build.data", "/tmp")), TestDefaultSorter.class.getName()).makeQualified(localFs.getUri(), localFs.getWorkingDirectory());
String localDirs = workingDir.toString();
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, HashPartitioner.class.getName());
conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs);
dirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
}
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class TestMapProcessor method getMapOutputFile.
private Path getMapOutputFile(Configuration jobConf, OutputContext outputContext) throws IOException {
LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
Path attemptOutput = new Path(new Path(Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR, outputContext.getUniqueIdentifier()), Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
Path mapOutputFile = lDirAlloc.getLocalPathToRead(attemptOutput.toString(), jobConf);
return mapOutputFile;
}
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class MapUtils method configureLocalDirs.
public static void configureLocalDirs(Configuration conf, String localDir) throws IOException {
String[] localSysDirs = new String[1];
localSysDirs[0] = localDir;
conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs);
conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir);
LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: " + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS));
LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: " + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));
LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
Path workDir = null;
// First, try to find the JOB_LOCAL_DIR on this host.
try {
workDir = lDirAlloc.getLocalPathToRead("work", conf);
} catch (DiskErrorException e) {
// DiskErrorException means dir not found. If not found, it will
// be created below.
}
if (workDir == null) {
// JOB_LOCAL_DIR doesn't exist on this host -- Create it.
workDir = lDirAlloc.getLocalPathForWrite("work", conf);
FileSystem lfs = FileSystem.getLocal(conf).getRaw();
boolean madeDir = false;
try {
madeDir = lfs.mkdirs(workDir);
} catch (FileAlreadyExistsException e) {
// Since all tasks will be running in their own JVM, the race condition
// exists where multiple tasks could be trying to create this directory
// at the same time. If this task loses the race, it's okay because
// the directory already exists.
madeDir = true;
workDir = lDirAlloc.getLocalPathToRead("work", conf);
}
if (!madeDir) {
throw new IOException("Mkdirs failed to create " + workDir.toString());
}
}
conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
}
Aggregations