use of org.apache.hadoop.mapreduce.Cluster in project hadoop by apache.
the class DistCp method createMetaFolderPath.
/**
* Create a default working folder for the job, under the
* job staging directory
*
* @return Returns the working folder information
* @throws Exception - Exception if any
*/
private Path createMetaFolderPath() throws Exception {
Configuration configuration = getConf();
Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(configuration), configuration);
Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
if (LOG.isDebugEnabled())
LOG.debug("Meta folder location: " + metaFolderPath);
configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());
return metaFolderPath;
}
use of org.apache.hadoop.mapreduce.Cluster in project hadoop by apache.
the class HadoopArchives method archive.
/**archive the given source paths into
* the dest
* @param parentPath the parent path of all the source paths
* @param srcPaths the src paths to be archived
* @param dest the dest dir that will contain the archive
*/
void archive(Path parentPath, List<Path> srcPaths, String archiveName, Path dest) throws IOException {
checkPaths(conf, srcPaths);
int numFiles = 0;
long totalSize = 0;
FileSystem fs = parentPath.getFileSystem(conf);
this.blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
this.partSize = conf.getLong(HAR_PARTSIZE_LABEL, partSize);
conf.setLong(HAR_BLOCKSIZE_LABEL, blockSize);
conf.setLong(HAR_PARTSIZE_LABEL, partSize);
conf.set(DST_HAR_LABEL, archiveName);
conf.set(SRC_PARENT_LABEL, parentPath.makeQualified(fs).toString());
conf.setInt(HAR_REPLICATION_LABEL, repl);
Path outputPath = new Path(dest, archiveName);
FileOutputFormat.setOutputPath(conf, outputPath);
FileSystem outFs = outputPath.getFileSystem(conf);
if (outFs.exists(outputPath)) {
throw new IOException("Archive path: " + outputPath.toString() + " already exists");
}
if (outFs.isFile(dest)) {
throw new IOException("Destination " + dest.toString() + " should be a directory but is a file");
}
conf.set(DST_DIR_LABEL, outputPath.toString());
Path stagingArea;
try {
stagingArea = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
} catch (InterruptedException ie) {
throw new IOException(ie);
}
Path jobDirectory = new Path(stagingArea, NAME + "_" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE), 36));
FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
FileSystem.mkdirs(jobDirectory.getFileSystem(conf), jobDirectory, mapredSysPerms);
conf.set(JOB_DIR_LABEL, jobDirectory.toString());
//get a tmp directory for input splits
FileSystem jobfs = jobDirectory.getFileSystem(conf);
Path srcFiles = new Path(jobDirectory, "_har_src_files");
conf.set(SRC_LIST_LABEL, srcFiles.toString());
SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf, srcFiles, LongWritable.class, HarEntry.class, SequenceFile.CompressionType.NONE);
// create single list of files and dirs
try {
// write the top level dirs in first
writeTopLevelDirs(srcWriter, srcPaths, parentPath);
srcWriter.sync();
// one at a time
for (Path src : srcPaths) {
ArrayList<FileStatusDir> allFiles = new ArrayList<FileStatusDir>();
FileStatus fstatus = fs.getFileStatus(src);
FileStatusDir fdir = new FileStatusDir(fstatus, null);
recursivels(fs, fdir, allFiles);
for (FileStatusDir statDir : allFiles) {
FileStatus stat = statDir.getFileStatus();
long len = stat.isDirectory() ? 0 : stat.getLen();
final Path path = relPathToRoot(stat.getPath(), parentPath);
final String[] children;
if (stat.isDirectory()) {
//get the children
FileStatus[] list = statDir.getChildren();
children = new String[list.length];
for (int i = 0; i < list.length; i++) {
children[i] = list[i].getPath().getName();
}
} else {
children = null;
}
append(srcWriter, len, path.toString(), children);
srcWriter.sync();
numFiles++;
totalSize += len;
}
}
} finally {
srcWriter.close();
}
conf.setInt(SRC_COUNT_LABEL, numFiles);
conf.setLong(TOTAL_SIZE_LABEL, totalSize);
int numMaps = (int) (totalSize / partSize);
//run atleast one map.
conf.setNumMapTasks(numMaps == 0 ? 1 : numMaps);
conf.setNumReduceTasks(1);
conf.setInputFormat(HArchiveInputFormat.class);
conf.setOutputFormat(NullOutputFormat.class);
conf.setMapperClass(HArchivesMapper.class);
conf.setReducerClass(HArchivesReducer.class);
conf.setMapOutputKeyClass(IntWritable.class);
conf.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(conf, jobDirectory);
//make sure no speculative execution is done
conf.setSpeculativeExecution(false);
JobClient.runJob(conf);
//delete the tmp job directory
try {
jobfs.delete(jobDirectory, true);
} catch (IOException ie) {
LOG.info("Unable to clean tmp directory " + jobDirectory);
}
}
Aggregations