use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class MRTask method configureLocalDirs.
private void configureLocalDirs() throws IOException {
// TODO NEWTEZ Is most of this functionality required ?
jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs());
if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) {
jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name()));
}
jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs());
LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
Path workDir = null;
// First, try to find the JOB_LOCAL_DIR on this host.
try {
workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
} catch (DiskErrorException e) {
// DiskErrorException means dir not found. If not found, it will
// be created below.
}
if (workDir == null) {
// JOB_LOCAL_DIR doesn't exist on this host -- Create it.
workDir = lDirAlloc.getLocalPathForWrite("work", jobConf);
FileSystem lfs = FileSystem.getLocal(jobConf).getRaw();
boolean madeDir = false;
try {
madeDir = lfs.mkdirs(workDir);
} catch (FileAlreadyExistsException e) {
// Since all tasks will be running in their own JVM, the race condition
// exists where multiple tasks could be trying to create this directory
// at the same time. If this task loses the race, it's okay because
// the directory already exists.
madeDir = true;
workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
}
if (!madeDir) {
throw new IOException("Mkdirs failed to create " + workDir.toString());
}
}
// TODO NEWTEZ Is this required ?
jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}
use of org.apache.hadoop.fs.LocalDirAllocator in project tez by apache.
the class FetcherOrderedGrouped method getShuffleInputFileName.
@VisibleForTesting
protected // TODO: Refactor following to make use of methods from TezTaskOutputFiles to be consistent.
Path getShuffleInputFileName(String pathComponent, String suffix) throws IOException {
LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
suffix = suffix != null ? suffix : "";
String outputPath = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + Path.SEPARATOR + pathComponent + Path.SEPARATOR + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING + suffix;
String pathFromLocalDir = getPathForLocalDir(outputPath);
return localDirAllocator.getLocalPathToRead(pathFromLocalDir.toString(), conf);
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class TestFSDownload method downloadWithFileType.
private void downloadWithFileType(TEST_FILE_TYPE fileType) throws IOException, URISyntaxException, InterruptedException {
Configuration conf = new Configuration();
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
FileContext files = FileContext.getLocalFSFileContext(conf);
final Path basedir = files.makeQualified(new Path("target", TestFSDownload.class.getSimpleName()));
files.mkdir(basedir, null, true);
conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
Random rand = new Random();
long sharedSeed = rand.nextLong();
rand.setSeed(sharedSeed);
System.out.println("SEED: " + sharedSeed);
Map<LocalResource, Future<Path>> pending = new HashMap<LocalResource, Future<Path>>();
ExecutorService exec = HadoopExecutors.newSingleThreadExecutor();
LocalDirAllocator dirs = new LocalDirAllocator(TestFSDownload.class.getName());
int size = rand.nextInt(512) + 512;
LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
Path p = new Path(basedir, "" + 1);
String strFileName = "";
LocalResource rsrc = null;
switch(fileType) {
case TAR:
rsrc = createTarFile(files, p, size, rand, vis);
break;
case JAR:
rsrc = createJarFile(files, p, size, rand, vis);
rsrc.setType(LocalResourceType.PATTERN);
break;
case ZIP:
rsrc = createZipFile(files, p, size, rand, vis);
strFileName = p.getName() + ".ZIP";
break;
case TGZ:
rsrc = createTgzFile(files, p, size, rand, vis);
break;
}
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
destPath = new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
pending.put(rsrc, exec.submit(fsd));
exec.shutdown();
while (!exec.awaitTermination(1000, TimeUnit.MILLISECONDS)) ;
try {
// see if there was an Exception during download
pending.get(rsrc).get();
FileStatus[] filesstatus = files.getDefaultFileSystem().listStatus(basedir);
for (FileStatus filestatus : filesstatus) {
if (filestatus.isDirectory()) {
FileStatus[] childFiles = files.getDefaultFileSystem().listStatus(filestatus.getPath());
for (FileStatus childfile : childFiles) {
if (strFileName.endsWith(".ZIP") && childfile.getPath().getName().equals(strFileName) && !childfile.isDirectory()) {
Assert.fail("Failure...After unzip, there should have been a" + " directory formed with zip file name but found a file. " + childfile.getPath());
}
if (childfile.getPath().getName().startsWith("tmp")) {
Assert.fail("Tmp File should not have been there " + childfile.getPath());
}
}
}
}
} catch (Exception e) {
throw new IOException("Failed exec", e);
}
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class YarnChild method configureLocalDirs.
/**
* Configure mapred-local dirs. This config is used by the task for finding
* out an output directory.
* @throws IOException
*/
private static void configureLocalDirs(Task task, JobConf job) throws IOException {
String[] localSysDirs = StringUtils.getTrimmedStrings(System.getenv(Environment.LOCAL_DIRS.name()));
job.setStrings(MRConfig.LOCAL_DIR, localSysDirs);
LOG.info(MRConfig.LOCAL_DIR + " for child: " + job.get(MRConfig.LOCAL_DIR));
LocalDirAllocator lDirAlloc = new LocalDirAllocator(MRConfig.LOCAL_DIR);
Path workDir = null;
// First, try to find the JOB_LOCAL_DIR on this host.
try {
workDir = lDirAlloc.getLocalPathToRead("work", job);
} catch (DiskErrorException e) {
// DiskErrorException means dir not found. If not found, it will
// be created below.
}
if (workDir == null) {
// JOB_LOCAL_DIR doesn't exist on this host -- Create it.
workDir = lDirAlloc.getLocalPathForWrite("work", job);
FileSystem lfs = FileSystem.getLocal(job).getRaw();
boolean madeDir = false;
try {
madeDir = lfs.mkdirs(workDir);
} catch (FileAlreadyExistsException e) {
// Since all tasks will be running in their own JVM, the race condition
// exists where multiple tasks could be trying to create this directory
// at the same time. If this task loses the race, it's okay because
// the directory already exists.
madeDir = true;
workDir = lDirAlloc.getLocalPathToRead("work", job);
}
if (!madeDir) {
throw new IOException("Mkdirs failed to create " + workDir.toString());
}
}
job.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}
use of org.apache.hadoop.fs.LocalDirAllocator in project hadoop by apache.
the class LocalDistributedCacheManager method setup.
/**
* Set up the distributed cache by localizing the resources, and updating
* the configuration with references to the localized resources.
* @param conf
* @throws IOException
*/
public void setup(JobConf conf) throws IOException {
File workDir = new File(System.getProperty("user.dir"));
// Generate YARN local resources objects corresponding to the distributed
// cache configuration
Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
MRApps.setupDistributedCache(conf, localResources);
// Generating unique numbers for FSDownload.
AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());
// Find which resources are to be put on the local classpath
Map<String, Path> classpaths = new HashMap<String, Path>();
Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
if (archiveClassPaths != null) {
for (Path p : archiveClassPaths) {
classpaths.put(p.toUri().getPath().toString(), p);
}
}
Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
if (fileClassPaths != null) {
for (Path p : fileClassPaths) {
classpaths.put(p.toUri().getPath().toString(), p);
}
}
// Localize the resources
LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
FileContext localFSFileContext = FileContext.getLocalFSFileContext();
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
ExecutorService exec = null;
try {
ThreadFactory tf = new ThreadFactoryBuilder().setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
exec = HadoopExecutors.newCachedThreadPool(tf);
Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
for (LocalResource resource : localResources.values()) {
Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
Future<Path> future = exec.submit(download);
resourcesToPaths.put(resource, future);
}
for (Entry<String, LocalResource> entry : localResources.entrySet()) {
LocalResource resource = entry.getValue();
Path path;
try {
path = resourcesToPaths.get(resource).get();
} catch (InterruptedException e) {
throw new IOException(e);
} catch (ExecutionException e) {
throw new IOException(e);
}
String pathString = path.toUri().toString();
String link = entry.getKey();
String target = new File(path.toUri()).getPath();
symlink(workDir, target, link);
if (resource.getType() == LocalResourceType.ARCHIVE) {
localArchives.add(pathString);
} else if (resource.getType() == LocalResourceType.FILE) {
localFiles.add(pathString);
} else if (resource.getType() == LocalResourceType.PATTERN) {
//PATTERN is not currently used in local mode
throw new IllegalArgumentException("Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
}
Path resourcePath;
try {
resourcePath = resource.getResource().toPath();
} catch (URISyntaxException e) {
throw new IOException(e);
}
LOG.info(String.format("Localized %s as %s", resourcePath, path));
String cp = resourcePath.toUri().getPath();
if (classpaths.keySet().contains(cp)) {
localClasspaths.add(path.toUri().getPath().toString());
}
}
} finally {
if (exec != null) {
exec.shutdown();
}
}
// Update the configuration object with localized data.
if (!localArchives.isEmpty()) {
conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
}
if (!localFiles.isEmpty()) {
conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
}
setupCalled = true;
}
Aggregations