Search in sources :

Example 16 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class MRTask method configureLocalDirs.

private void configureLocalDirs() throws IOException {
    // TODO NEWTEZ Is most of this functionality required ?
    jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs());
    if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) {
        jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name()));
    }
    jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs());
    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
    } catch (DiskErrorException e) {
    // DiskErrorException means dir not found. If not found, it will
    // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", jobConf);
        FileSystem lfs = FileSystem.getLocal(jobConf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    // TODO NEWTEZ Is this required ?
    jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
    jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}
Also used : Path(org.apache.hadoop.fs.Path) FileAlreadyExistsException(org.apache.hadoop.mapred.FileAlreadyExistsException) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IOException(java.io.IOException)

Example 17 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.

the class FetcherOrderedGrouped method getShuffleInputFileName.

@VisibleForTesting
protected // TODO: Refactor following to make use of methods from TezTaskOutputFiles to be consistent.
Path getShuffleInputFileName(String pathComponent, String suffix) throws IOException {
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    suffix = suffix != null ? suffix : "";
    String outputPath = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + Path.SEPARATOR + pathComponent + Path.SEPARATOR + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING + suffix;
    String pathFromLocalDir = getPathForLocalDir(outputPath);
    return localDirAllocator.getLocalPathToRead(pathFromLocalDir.toString(), conf);
}
Also used : LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 18 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class TestFSDownload method downloadWithFileType.

private void downloadWithFileType(TEST_FILE_TYPE fileType) throws IOException, URISyntaxException, InterruptedException {
    Configuration conf = new Configuration();
    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
    FileContext files = FileContext.getLocalFSFileContext(conf);
    final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
    files.mkdir(basedir, null, true);
    conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
    Random rand = new Random();
    long sharedSeed = rand.nextLong();
    rand.setSeed(sharedSeed);
    System.out.println("SEED: " + sharedSeed);
    Map<LocalResource, Future<Path>> pending = new HashMap<LocalResource, Future<Path>>();
    ExecutorService exec = HadoopExecutors.newSingleThreadExecutor();
    LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
    int size = rand.nextInt(512) + 512;
    LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
    Path p = new Path(basedir, "" + 1);
    String strFileName = "";
    LocalResource rsrc = null;
    switch(fileType) {
        case TAR:
            rsrc = createTarFile(files, p, size, rand, vis);
            break;
        case JAR:
            rsrc = createJarFile(files, p, size, rand, vis);
            rsrc.setType(LocalResourceType.PATTERN);
            break;
        case ZIP:
            rsrc = createZipFile(files, p, size, rand, vis);
            strFileName = p.getName() + ".ZIP";
            break;
        case TGZ:
            rsrc = createTgzFile(files, p, size, rand, vis);
            break;
    }
    Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
    destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
    FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
    pending.put(rsrc, exec.submit(fsd));
    exec.shutdown();
    while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
    try {
        // see if there was an Exception during download
        pending.get(rsrc).get();
        FileStatus[] filesstatus = files.getDefaultFileSystem().listStatus(basedir);
        for (FileStatus filestatus : filesstatus) {
            if (filestatus.isDirectory()) {
                FileStatus[] childFiles = files.getDefaultFileSystem().listStatus(filestatus.getPath());
                for (FileStatus childfile : childFiles) {
                    if (strFileName.endsWith(".ZIP") && childfile.getPath().getName().equals(strFileName) && !childfile.isDirectory()) {
                        Assert.fail("Failure...After unzip, there should have been a" + " directory formed with zip file name but found a file. " + childfile.getPath());
                    }
                    if (childfile.getPath().getName().startsWith("tmp")) {
                        Assert.fail("Tmp File should not have been there " + childfile.getPath());
                    }
                }
            }
        }
    } catch (Exception e) {
        throw new IOException("Failed exec", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) LocalResourceVisibility(org.apache.hadoop.yarn.api.records.LocalResourceVisibility) Random(java.util.Random) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) FileContext(org.apache.hadoop.fs.FileContext)

Example 19 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class YarnChild method configureLocalDirs.

/**
   * Configure mapred-local dirs. This config is used by the task for finding
   * out an output directory.
   * @throws IOException 
   */
private static void configureLocalDirs(Task task, JobConf job) throws IOException {
    String[] localSysDirs = StringUtils.getTrimmedStrings(System.getenv(Environment.LOCAL_DIRS.name()));
    job.setStrings(MRConfig.LOCAL_DIR, localSysDirs);
    LOG.info(MRConfig.LOCAL_DIR + " for child: " + job.get(MRConfig.LOCAL_DIR));
    LocalDirAllocator lDirAlloc = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", job);
    } catch (DiskErrorException e) {
    // DiskErrorException means dir not found. If not found, it will
    // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", job);
        FileSystem lfs = FileSystem.getLocal(job).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", job);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    job.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}
Also used : Path(org.apache.hadoop.fs.Path) DiskErrorException(org.apache.hadoop.util.DiskChecker.DiskErrorException) FileSystem(org.apache.hadoop.fs.FileSystem) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) IOException(java.io.IOException)

Example 20 with LocalDirAllocator

use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.

the class LocalDistributedCacheManager method setup.

/**
   * Set up the distributed cache by localizing the resources, and updating
   * the configuration with references to the localized resources.
   * @param conf
   * @throws IOException
   */
public void setup(JobConf conf) throws IOException {
    File workDir = new File(System.getProperty("user.dir"));
    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);
    // Generating unique numbers for FSDownload.
    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());
    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }
    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder().setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = HadoopExecutors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);
            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException("Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = resource.getResource().toPath();
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;
}
Also used : ThreadFactory(java.util.concurrent.ThreadFactory) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) URISyntaxException(java.net.URISyntaxException) LinkedHashMap(java.util.LinkedHashMap) FSDownload(org.apache.hadoop.yarn.util.FSDownload) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) AtomicLong(java.util.concurrent.atomic.AtomicLong) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) File(java.io.File) FileContext(org.apache.hadoop.fs.FileContext)

Aggregations

LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)28 Path (org.apache.hadoop.fs.Path)16 Test (org.junit.Test)13 Configuration (org.apache.hadoop.conf.Configuration)12 FileSystem (org.apache.hadoop.fs.FileSystem)12 IOException (java.io.IOException)8 ExecutorService (java.util.concurrent.ExecutorService)6 FileContext (org.apache.hadoop.fs.FileContext)6 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)6 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)6 InputContext (org.apache.tez.runtime.api.InputContext)6 TezRuntimeConfiguration (org.apache.tez.runtime.library.api.TezRuntimeConfiguration)6 HashMap (java.util.HashMap)5 ExecutionException (java.util.concurrent.ExecutionException)5 Future (java.util.concurrent.Future)5 LocalResourceVisibility (org.apache.hadoop.yarn.api.records.LocalResourceVisibility)5 Map (java.util.Map)4 Random (java.util.Random)4 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 IntWritable (org.apache.hadoop.io.IntWritable)4