Search in sources :

Example 1 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class TestMerger method testInMemoryAndOnDiskMerger.

@Test
public void testInMemoryAndOnDiskMerger() throws Throwable {
    JobID jobId = new JobID("a", 0);
    TaskAttemptID reduceId1 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 0), 0);
    TaskAttemptID mapId1 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 1), 0);
    TaskAttemptID mapId2 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 2), 0);
    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    // write map outputs
    Map<String, String> map1 = new TreeMap<String, String>();
    map1.put("apple", "disgusting");
    map1.put("carrot", "delicious");
    Map<String, String> map2 = new TreeMap<String, String>();
    map1.put("banana", "pretty good");
    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
    InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0, mapOutputBytes1.length);
    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0, mapOutputBytes2.length);
    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs1 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs1.add(mapOutput1);
    mapOutputs1.add(mapOutput2);
    inMemoryMerger.merge(mapOutputs1);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    TaskAttemptID reduceId2 = new TaskAttemptID(new TaskID(jobId, TaskType.REDUCE, 3), 0);
    TaskAttemptID mapId3 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 4), 0);
    TaskAttemptID mapId4 = new TaskAttemptID(new TaskID(jobId, TaskType.MAP, 5), 0);
    // write map outputs
    Map<String, String> map3 = new TreeMap<String, String>();
    map3.put("apple", "awesome");
    map3.put("carrot", "amazing");
    Map<String, String> map4 = new TreeMap<String, String>();
    map4.put("banana", "bla");
    byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
    byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
    InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
    InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
    System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0, mapOutputBytes3.length);
    System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0, mapOutputBytes4.length);
    //    // create merger and run merge
    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 = mergeManager.createInMemoryMerger();
    List<InMemoryMapOutput<Text, Text>> mapOutputs2 = new ArrayList<InMemoryMapOutput<Text, Text>>();
    mapOutputs2.add(mapOutput3);
    mapOutputs2.add(mapOutput4);
    inMemoryMerger2.merge(mapOutputs2);
    Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
    List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
    Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
    List<String> keys = new ArrayList<String>();
    List<String> values = new ArrayList<String>();
    while (iterator.hasNext()) {
        CompressAwarePath next = iterator.next();
        readOnDiskMapOutput(conf, fs, next, keys, values);
        paths.add(next);
    }
    Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
    mergeManager.close();
    mergeManager = new MergeManagerImpl<Text, Text>(reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null, null, null, new Progress(), new MROutputFiles());
    MergeThread<CompressAwarePath, Text, Text> onDiskMerger = mergeManager.createOnDiskMerger();
    onDiskMerger.merge(paths);
    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
    keys = new ArrayList<String>();
    values = new ArrayList<String>();
    readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
    Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
    Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
    mergeManager.close();
    Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
    Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
    Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
}
Also used : MROutputFiles(org.apache.hadoop.mapred.MROutputFiles) Progress(org.apache.hadoop.util.Progress) TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TreeMap(java.util.TreeMap) CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 2 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class TestShuffleScheduler method TestSucceedAndFailedCopyMap.

@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestSucceedAndFailedCopyMap() throws Exception {
    JobConf job = new JobConf();
    job.setNumMapTasks(2);
    //mock creation
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    @SuppressWarnings("unchecked") MapOutput<K, V> output = mock(MapOutput.class);
    ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
    TaskStatus status = new TaskStatus() {

        @Override
        public boolean getIsMap() {
            return false;
        }

        @Override
        public void addFetchFailedMap(TaskAttemptID mapTaskId) {
        }
    };
    Progress progress = new Progress();
    ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null, null, progress, context.getShuffledMapsCounter(), context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
    MapHost host1 = new MapHost("host1", null);
    TaskAttemptID failedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 0), 0);
    TaskAttemptID succeedAttemptID = new TaskAttemptID(new org.apache.hadoop.mapred.TaskID(new JobID("test", 0), TaskType.MAP, 1), 1);
    // handle output fetch failure for failedAttemptID, part I
    scheduler.hostFailed(host1.getHostName());
    // handle output fetch succeed for succeedAttemptID
    long bytes = (long) 500 * 1024 * 1024;
    scheduler.copySucceeded(succeedAttemptID, host1, bytes, 0, 500000, output);
    // handle output fetch failure for failedAttemptID, part II
    // for MAPREDUCE-6361: verify no NPE exception get thrown out
    scheduler.copyFailed(failedAttemptID, host1, true, false);
}
Also used : Task(org.apache.hadoop.mapred.Task) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) JobID(org.apache.hadoop.mapreduce.JobID) Test(org.junit.Test)

Example 3 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class S3AFileSystem method createTmpFileForWrite.

/**
   * Demand create the directory allocator, then create a temporary file.
   * {@link LocalDirAllocator#createTmpFileForWrite(String, long, Configuration)}.
   *  @param pathStr prefix for the temporary file
   *  @param size the size of the file that is going to be written
   *  @param conf the Configuration object
   *  @return a unique temporary file
   *  @throws IOException IO problems
   */
synchronized File createTmpFileForWrite(String pathStr, long size, Configuration conf) throws IOException {
    if (directoryAllocator == null) {
        String bufferDir = conf.get(BUFFER_DIR) != null ? BUFFER_DIR : "hadoop.tmp.dir";
        directoryAllocator = new LocalDirAllocator(bufferDir);
    }
    return directoryAllocator.createTmpFileForWrite(pathStr, size, conf);
}
Also used : LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator)

Example 4 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class TestFSDownload method testUniqueDestinationPath.

@Test(timeout = 10000)
public void testUniqueDestinationPath() throws Exception {
    Configuration conf = new Configuration();
    FileContext files = FileContext.getLocalFSFileContext(conf);
    final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
    files.mkdir(basedir, null, true);
    conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
    ExecutorService singleThreadedExec = HadoopExecutors.newSingleThreadExecutor();
    LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
    Path destPath = dirs.getLocalPathForWrite(basedir.toString(), conf);
    destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
    Path p = new Path(basedir, "dir" + 0 + ".jar");
    LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
    LocalResource rsrc = createJar(files, p, vis);
    FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
    Future<Path> rPath = singleThreadedExec.submit(fsd);
    singleThreadedExec.shutdown();
    while (!singleThreadedExec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
    Assert.assertTrue(rPath.isDone());
    // Now FSDownload will not create a random directory to localize the
    // resource. Therefore the final localizedPath for the resource should be
    // destination directory (passed as an argument) + file name.
    Assert.assertEquals(destPath, rPath.get().getParent());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ExecutorService(java.util.concurrent.ExecutorService) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileContext(org.apache.hadoop.fs.FileContext) LocalResourceVisibility(org.apache.hadoop.yarn.api.records.LocalResourceVisibility) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) Test(org.junit.Test)

Example 5 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class TestFSDownload method testDownloadBadPublic.

@Test(timeout = 10000)
public void testDownloadBadPublic() throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
    FileContext files = FileContext.getLocalFSFileContext(conf);
    final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
    files.mkdir(basedir, null, true);
    conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
    Map<LocalResource, LocalResourceVisibility> rsrcVis = new HashMap<LocalResource, LocalResourceVisibility>();
    Random rand = new Random();
    long sharedSeed = rand.nextLong();
    rand.setSeed(sharedSeed);
    System.out.println("SEED: " + sharedSeed);
    Map<LocalResource, Future<Path>> pending = new HashMap<LocalResource, Future<Path>>();
    ExecutorService exec = HadoopExecutors.newSingleThreadExecutor();
    LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
    int size = 512;
    LocalResourceVisibility vis = LocalResourceVisibility.PUBLIC;
    Path path = new Path(basedir, "test-file");
    LocalResource rsrc = createFile(files, path, size, rand, vis);
    rsrcVis.put(rsrc, vis);
    Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
    destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
    FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
    pending.put(rsrc, exec.submit(fsd));
    exec.shutdown();
    while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
    Assert.assertTrue(pending.get(rsrc).isDone());
    try {
        for (Map.Entry<LocalResource, Future<Path>> p : pending.entrySet()) {
            p.getValue().get();
            Assert.fail("We localized a file that is not public.");
        }
    } catch (ExecutionException e) {
        Assert.assertTrue(e.getCause() instanceof IOException);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) LocalResourceVisibility(org.apache.hadoop.yarn.api.records.LocalResourceVisibility) Random(java.util.Random) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) FileContext(org.apache.hadoop.fs.FileContext) Test(org.junit.Test)

Aggregations

LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)28 Path (org.apache.hadoop.fs.Path)16 Test (org.junit.Test)13 Configuration (org.apache.hadoop.conf.Configuration)12 FileSystem (org.apache.hadoop.fs.FileSystem)12 IOException (java.io.IOException)8 ExecutorService (java.util.concurrent.ExecutorService)6 FileContext (org.apache.hadoop.fs.FileContext)6 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 InputContext (org.apache.tez.runtime.api.InputContext)6 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)6 HashMap (java.util.HashMap)5 ExecutionException (java.util.concurrent.ExecutionException)5 Future (java.util.concurrent.Future)5 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)5 Map (java.util.Map)4 Random (java.util.Random)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 IntWritable (org.apache.hadoop.io.IntWritable)4