Search in sources :

Example 11 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class TestMergeManager method testIntermediateMemoryMergeAccounting.

@Test(timeout = 20000)
public void testIntermediateMemoryMergeAccounting() throws Exception {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, true);
    conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 2);
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter exceptionReporter = mock(ExceptionReporter.class);
    MergeManager mergeManager = new MergeManager(conf, localFs, localDirAllocator, inputContext, null, null, null, null, exceptionReporter, 2000000, null, false, -1);
    mergeManager.configureAndStart();
    assertEquals(0, mergeManager.getUsedMemory());
    assertEquals(0, mergeManager.getCommitMemory());
    byte[] data1 = generateData(conf, 10, null);
    byte[] data2 = generateData(conf, 20, null);
    MapOutput firstMapOutput = mergeManager.reserve(null, data1.length, data1.length, 0);
    MapOutput secondMapOutput = mergeManager.reserve(null, data2.length, data2.length, 0);
    assertEquals(MapOutput.Type.MEMORY, firstMapOutput.getType());
    assertEquals(MapOutput.Type.MEMORY, secondMapOutput.getType());
    assertEquals(0, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
    System.arraycopy(data1, 0, firstMapOutput.getMemory(), 0, data1.length);
    System.arraycopy(data2, 0, secondMapOutput.getMemory(), 0, data2.length);
    secondMapOutput.commit();
    assertEquals(data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
    firstMapOutput.commit();
    mergeManager.waitForMemToMemMerge();
    assertEquals(data1.length + data2.length, mergeManager.getCommitMemory());
    assertEquals(data1.length + data2.length, mergeManager.getUsedMemory());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) InputContext(org.apache.tez.runtime.api.InputContext) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) Test(org.junit.Test)

Example 12 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class TestMergeManager method testLocalDiskMergeMultipleTasks.

void testLocalDiskMergeMultipleTasks(final boolean interruptInMiddle) throws IOException, InterruptedException {
    Configuration conf = new TezConfiguration(defaultConf);
    conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_COMPRESS, false);
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, IntWritable.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, IntWritable.class.getName());
    Path localDir = new Path(workDir, "local");
    Path srcDir = new Path(workDir, "srcData");
    localFs.mkdirs(localDir);
    localFs.mkdirs(srcDir);
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDir.toString());
    FileSystem localFs = FileSystem.getLocal(conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    InputContext t0inputContext = createMockInputContext(UUID.randomUUID().toString());
    InputContext t1inputContext = createMockInputContext(UUID.randomUUID().toString());
    ExceptionReporter t0exceptionReporter = mock(ExceptionReporter.class);
    ExceptionReporter t1exceptionReporter = mock(ExceptionReporter.class);
    MergeManager t0mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t0inputContext, null, null, null, null, t0exceptionReporter, 2000000, null, false, -1) {

        // override for interruptInMiddle testing
        @Override
        public synchronized void closeOnDiskFile(FileChunk file) {
            if (interruptInMiddle) {
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    return;
                }
            }
            super.closeOnDiskFile(file);
        }
    };
    MergeManager t0mergeManager = spy(t0mergeManagerReal);
    t0mergeManager.configureAndStart();
    MergeManager t1mergeManagerReal = new MergeManager(conf, localFs, localDirAllocator, t1inputContext, null, null, null, null, t1exceptionReporter, 2000000, null, false, -1);
    MergeManager t1mergeManager = spy(t1mergeManagerReal);
    // Partition 0 Keys 0-2, Partition 1 Keys 3-5
    SrcFileInfo src1Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src1.out"), 2, 3, 0);
    // Partition 0 Keys 6-8, Partition 1 Keys 9-11
    SrcFileInfo src2Info = createFile(conf, localFs, new Path(srcDir, InputAttemptIdentifier.PATH_PREFIX + "src2.out"), 2, 3, 6);
    // Simulating Task 0 fetches partition 0. (targetIndex = 0,1)
    // Simulating Task 1 fetches partition 1. (targetIndex = 0,1)
    InputAttemptIdentifier t0Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t0Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
    InputAttemptIdentifier t1Identifier0 = new InputAttemptIdentifier(0, 0, src1Info.path.getName());
    InputAttemptIdentifier t1Identifier1 = new InputAttemptIdentifier(1, 0, src2Info.path.getName());
    MapOutput t0MapOutput0 = getMapOutputForDirectDiskFetch(t0Identifier0, src1Info.path, src1Info.indexedRecords[0], t0mergeManager);
    MapOutput t0MapOutput1 = getMapOutputForDirectDiskFetch(t0Identifier1, src2Info.path, src2Info.indexedRecords[0], t0mergeManager);
    MapOutput t1MapOutput0 = getMapOutputForDirectDiskFetch(t1Identifier0, src1Info.path, src1Info.indexedRecords[1], t1mergeManager);
    MapOutput t1MapOutput1 = getMapOutputForDirectDiskFetch(t1Identifier1, src2Info.path, src2Info.indexedRecords[1], t1mergeManager);
    t0MapOutput0.commit();
    t0MapOutput1.commit();
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput0.getOutputPath());
    verify(t0mergeManager).closeOnDiskFile(t0MapOutput1.getOutputPath());
    // Run the OnDiskMerge via MergeManager
    // Simulate the thread invocation - remove files, and invoke merge
    List<FileChunk> t0MergeFiles = new LinkedList<FileChunk>();
    t0MergeFiles.addAll(t0mergeManager.onDiskMapOutputs);
    t0mergeManager.onDiskMapOutputs.clear();
    if (!interruptInMiddle) {
        t0mergeManager.onDiskMerger.merge(t0MergeFiles);
        Assert.assertEquals(1, t0mergeManager.onDiskMapOutputs.size());
    } else {
        // Start Interrupting thread
        Thread interruptingThread = new Thread(new InterruptingThread(t0mergeManager.onDiskMerger));
        interruptingThread.start();
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        // Will be interrupted in the middle by interruptingThread.
        t0mergeManager.onDiskMerger.startMerge(Sets.newHashSet(t0MergeFiles));
        t0mergeManager.onDiskMerger.waitForMerge();
        Assert.assertNotEquals(1, t0mergeManager.onDiskMapOutputs.size());
    }
    if (!interruptInMiddle) {
        t1MapOutput0.commit();
        t1MapOutput1.commit();
        verify(t1mergeManager).closeOnDiskFile(t1MapOutput0.getOutputPath());
        verify(t1mergeManager).closeOnDiskFile(t1MapOutput1.getOutputPath());
        // Run the OnDiskMerge via MergeManager
        // Simulate the thread invocation - remove files, and invoke merge
        List<FileChunk> t1MergeFiles = new LinkedList<FileChunk>();
        t1MergeFiles.addAll(t1mergeManager.onDiskMapOutputs);
        t1mergeManager.onDiskMapOutputs.clear();
        t1mergeManager.onDiskMerger.merge(t1MergeFiles);
        Assert.assertEquals(1, t1mergeManager.onDiskMapOutputs.size());
        Assert.assertNotEquals(t0mergeManager.onDiskMapOutputs.iterator().next().getPath(), t1mergeManager.onDiskMapOutputs.iterator().next().getPath());
        Assert.assertTrue(t0mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t0inputContext.getUniqueIdentifier()));
        Assert.assertTrue(t1mergeManager.onDiskMapOutputs.iterator().next().getPath().toString().contains(t1inputContext.getUniqueIdentifier()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) InputContext(org.apache.tez.runtime.api.InputContext) InputAttemptIdentifier(org.apache.tez.runtime.library.common.InputAttemptIdentifier) LinkedList(java.util.LinkedList) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileChunk(org.apache.hadoop.io.FileChunk) IntWritable(org.apache.hadoop.io.IntWritable) TezConfiguration(org.apache.tez.dag.api.TezConfiguration)

Example 13 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class TestDefaultSorter method setup.

@Before
public void setup() throws IOException {
    conf = new Configuration();
    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
    // DefaultSorter
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_SORTER_CLASS, SorterImpl.LEGACY.name());
    conf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(conf);
    workingDir = new Path(new Path(System.getProperty("test.build.data", "/tmp")), TestDefaultSorter.class.getName()).makeQualified(localFs.getUri(), localFs.getWorkingDirectory());
    String localDirs = workingDir.toString();
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, Text.class.getName());
    conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, HashPartitioner.class.getName());
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localDirs);
    dirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TezConfiguration(org.apache.tez.dag.api.TezConfiguration) TezRuntimeConfiguration(org.apache.tez.runtime.library.api.TezRuntimeConfiguration) HashPartitioner(org.apache.tez.runtime.library.partitioner.HashPartitioner) Text(org.apache.hadoop.io.Text) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) ByteString(com.google.protobuf.ByteString) Before(org.junit.Before)

Example 14 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class TestMapProcessor method getMapOutputFile.

private Path getMapOutputFile(Configuration jobConf, OutputContext outputContext) throws IOException {
    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path attemptOutput = new Path(new Path(Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR, outputContext.getUniqueIdentifier()), Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
    Path mapOutputFile = lDirAlloc.getLocalPathToRead(attemptOutput.toString(), jobConf);
    return mapOutputFile;
}
Also used : Path(org.apache.hadoop.fs.Path) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator)

Example 15 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class MapUtils method configureLocalDirs.

public static void configureLocalDirs(Configuration conf, String localDir) throws IOException {
    String[] localSysDirs = new String[1];
    localSysDirs[0] = localDir;
    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs);
    conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir);
    LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: " + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS));
    LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: " + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));
    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", conf);
    } catch (DiskErrorException e) {
    // DiskErrorException means dir not found. If not found, it will
    // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", conf);
        FileSystem lfs = FileSystem.getLocal(conf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", conf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
}
Also used : Path(org.apache.hadoop.fs.Path) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IOException(java.io.IOException)

Aggregations

LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)28 Path (org.apache.hadoop.fs.Path)16 Test (org.junit.Test)13 Configuration (org.apache.hadoop.conf.Configuration)12 FileSystem (org.apache.hadoop.fs.FileSystem)12 IOException (java.io.IOException)8 ExecutorService (java.util.concurrent.ExecutorService)6 FileContext (org.apache.hadoop.fs.FileContext)6 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 InputContext (org.apache.tez.runtime.api.InputContext)6 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)6 HashMap (java.util.HashMap)5 ExecutionException (java.util.concurrent.ExecutionException)5 Future (java.util.concurrent.Future)5 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)5 Map (java.util.Map)4 Random (java.util.Random)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 IntWritable (org.apache.hadoop.io.IntWritable)4