use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class CompressionEmulationUtil method getPossiblyCompressedOutputStream.
/**
* Returns a {@link OutputStream} for a file that might need
* compression.
*/
static OutputStream getPossiblyCompressedOutputStream(Path file, Configuration conf) throws IOException {
FileSystem fs = file.getFileSystem(conf);
JobConf jConf = new JobConf(conf);
if (org.apache.hadoop.mapred.FileOutputFormat.getCompressOutput(jConf)) {
// get the codec class
Class<? extends CompressionCodec> codecClass = org.apache.hadoop.mapred.FileOutputFormat.getOutputCompressorClass(jConf, GzipCodec.class);
// get the codec implementation
CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
// add the appropriate extension
file = file.suffix(codec.getDefaultExtension());
if (isCompressionEmulationEnabled(conf)) {
FSDataOutputStream fileOut = fs.create(file, false);
return new DataOutputStream(codec.createOutputStream(fileOut));
}
}
return fs.create(file, false);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class DistributedCacheEmulator method updateHDFSDistCacheFilesList.
/**
* For the job to be simulated, identify the needed distributed cache files by
* mapping original cluster's distributed cache file paths to the simulated cluster's
* paths and add these paths in the map {@code distCacheFiles}.
*<br>
* JobStory should contain distributed cache related properties like
* <li> {@link MRJobConfig#CACHE_FILES}
* <li> {@link MRJobConfig#CACHE_FILE_VISIBILITIES}
* <li> {@link MRJobConfig#CACHE_FILES_SIZES}
* <li> {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}
* <li> {@link MRJobConfig#CLASSPATH_FILES}
*
* <li> {@link MRJobConfig#CACHE_ARCHIVES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_VISIBILITIES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_SIZES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}
* <li> {@link MRJobConfig#CLASSPATH_ARCHIVES}
*
* <li> {@link MRJobConfig#CACHE_SYMLINK}
*
* @param jobdesc JobStory of original job obtained from trace
* @throws IOException
*/
void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException {
// Map original job's distributed cache file paths to simulated cluster's
// paths, to be used by this simulated job.
JobConf jobConf = jobdesc.getJobConf();
String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
if (files != null) {
String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
String[] visibilities = jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
String[] timeStamps = jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
FileSystem fs = FileSystem.get(conf);
String user = jobConf.getUser();
for (int i = 0; i < files.length; i++) {
// Check if visibilities are available because older hadoop versions
// didn't have public, private Distributed Caches separately.
boolean visibility = (visibilities == null) || Boolean.parseBoolean(visibilities[i]);
if (isLocalDistCacheFile(files[i], user, visibility)) {
// simulated job is submitted).
continue;
}
// distributed cache file on hdfs
String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i], visibility, user);
// should correspond to a single file size.
if (distCacheFiles.containsKey(mappedPath) || fs.exists(new Path(mappedPath))) {
continue;
}
distCacheFiles.put(mappedPath, Long.valueOf(fileSizes[i]));
}
}
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class JobSubmitter method writeSplits.
private int writeSplits(org.apache.hadoop.mapreduce.JobContext job, Path jobSubmitDir) throws IOException, InterruptedException, ClassNotFoundException {
JobConf jConf = (JobConf) job.getConfiguration();
int maps;
if (jConf.getUseNewMapper()) {
maps = writeNewSplits(job, jobSubmitDir);
} else {
maps = writeOldSplits(jConf, jobSubmitDir);
}
return maps;
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class JobSubmitter method checkSpecs.
private void checkSpecs(Job job) throws ClassNotFoundException, InterruptedException, IOException {
JobConf jConf = (JobConf) job.getConfiguration();
// Check the output specification
if (jConf.getNumReduceTasks() == 0 ? jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
org.apache.hadoop.mapreduce.OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), job.getConfiguration());
output.checkOutputSpecs(job);
} else {
jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
}
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class Limits method init.
public static synchronized void init(Configuration conf) {
if (!isInited) {
if (conf == null) {
conf = new JobConf();
}
GROUP_NAME_MAX = conf.getInt(COUNTER_GROUP_NAME_MAX_KEY, COUNTER_GROUP_NAME_MAX_DEFAULT);
COUNTER_NAME_MAX = conf.getInt(COUNTER_NAME_MAX_KEY, COUNTER_NAME_MAX_DEFAULT);
GROUPS_MAX = conf.getInt(COUNTER_GROUPS_MAX_KEY, COUNTER_GROUPS_MAX_DEFAULT);
COUNTERS_MAX = conf.getInt(COUNTERS_MAX_KEY, COUNTERS_MAX_DEFAULT);
}
isInited = true;
}
Aggregations