use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class HadoopArchives method main.
/** the main functions **/
public static void main(String[] args) {
JobConf job = new JobConf(HadoopArchives.class);
HadoopArchives harchives = new HadoopArchives(job);
int ret = 0;
try {
ret = ToolRunner.run(harchives, args);
} catch (Exception e) {
LOG.debug("Exception in archives ", e);
System.err.println(e.getClass().getSimpleName() + " in archives");
final String s = e.getLocalizedMessage();
if (s != null) {
System.err.println(s);
} else {
e.printStackTrace(System.err);
}
System.exit(1);
}
System.exit(ret);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class HadoopArchiveLogs method main.
public static void main(String[] args) {
JobConf job = new JobConf(HadoopArchiveLogs.class);
HadoopArchiveLogs hal = new HadoopArchiveLogs(job);
int ret = 0;
try {
ret = ToolRunner.run(hal, args);
} catch (Exception e) {
LOG.debug("Exception", e);
System.err.println(e.getClass().getSimpleName());
final String s = e.getLocalizedMessage();
if (s != null) {
System.err.println(s);
} else {
e.printStackTrace(System.err);
}
System.exit(1);
}
System.exit(ret);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class DataJoinJob method createDataJoinJob.
public static JobConf createDataJoinJob(String[] args) throws IOException {
String inputDir = args[0];
String outputDir = args[1];
Class inputFormat = SequenceFileInputFormat.class;
if (args[2].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileInputFormat: " + args[2]);
} else {
System.out.println("Using TextInputFormat: " + args[2]);
inputFormat = TextInputFormat.class;
}
int numOfReducers = Integer.parseInt(args[3]);
Class mapper = getClassByName(args[4]);
Class reducer = getClassByName(args[5]);
Class mapoutputValueClass = getClassByName(args[6]);
Class outputFormat = TextOutputFormat.class;
Class outputValueClass = Text.class;
if (args[7].compareToIgnoreCase("text") != 0) {
System.out.println("Using SequenceFileOutputFormat: " + args[7]);
outputFormat = SequenceFileOutputFormat.class;
outputValueClass = getClassByName(args[7]);
} else {
System.out.println("Using TextOutputFormat: " + args[7]);
}
long maxNumOfValuesPerGroup = 100;
String jobName = "";
if (args.length > 8) {
maxNumOfValuesPerGroup = Long.parseLong(args[8]);
}
if (args.length > 9) {
jobName = args[9];
}
Configuration defaults = new Configuration();
JobConf job = new JobConf(defaults, DataJoinJob.class);
job.setJobName("DataJoinJob: " + jobName);
FileSystem fs = FileSystem.get(defaults);
fs.delete(new Path(outputDir), true);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormat(inputFormat);
job.setMapperClass(mapper);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setOutputFormat(outputFormat);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(mapoutputValueClass);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(outputValueClass);
job.setReducerClass(reducer);
job.setNumMapTasks(1);
job.setNumReduceTasks(numOfReducers);
job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
return job;
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class Submitter method run.
@Override
public int run(String[] args) throws Exception {
CommandLineParser cli = new CommandLineParser();
if (args.length == 0) {
cli.printUsage();
return 1;
}
cli.addOption("input", false, "input path to the maps", "path");
cli.addOption("output", false, "output path from the reduces", "path");
cli.addOption("jar", false, "job jar file", "path");
cli.addOption("inputformat", false, "java classname of InputFormat", "class");
//cli.addArgument("javareader", false, "is the RecordReader in Java");
cli.addOption("map", false, "java classname of Mapper", "class");
cli.addOption("partitioner", false, "java classname of Partitioner", "class");
cli.addOption("reduce", false, "java classname of Reducer", "class");
cli.addOption("writer", false, "java classname of OutputFormat", "class");
cli.addOption("program", false, "URI to application executable", "class");
cli.addOption("reduces", false, "number of reduces", "num");
cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
Parser parser = cli.createParser();
try {
GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());
JobConf job = new JobConf(getConf());
if (results.hasOption("input")) {
FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
}
if (results.hasOption("output")) {
FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
}
if (results.hasOption("jar")) {
job.setJar(results.getOptionValue("jar"));
}
if (results.hasOption("inputformat")) {
setIsJavaRecordReader(job, true);
job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
}
if (results.hasOption("javareader")) {
setIsJavaRecordReader(job, true);
}
if (results.hasOption("map")) {
setIsJavaMapper(job, true);
job.setMapperClass(getClass(results, "map", job, Mapper.class));
}
if (results.hasOption("partitioner")) {
job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
}
if (results.hasOption("reduce")) {
setIsJavaReducer(job, true);
job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
}
if (results.hasOption("reduces")) {
job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
}
if (results.hasOption("writer")) {
setIsJavaRecordWriter(job, true);
job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
}
if (results.hasOption("lazyOutput")) {
if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
}
}
if (results.hasOption("program")) {
setExecutable(job, results.getOptionValue("program"));
}
if (results.hasOption("jobconf")) {
LOG.warn("-jobconf option is deprecated, please use -D instead.");
String options = results.getOptionValue("jobconf");
StringTokenizer tokenizer = new StringTokenizer(options, ",");
while (tokenizer.hasMoreTokens()) {
String keyVal = tokenizer.nextToken().trim();
String[] keyValSplit = keyVal.split("=");
job.set(keyValSplit[0], keyValSplit[1]);
}
}
// if they gave us a jar file, include it into the class path
String jarFile = job.getJar();
if (jarFile != null) {
final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
//FindBugs complains that creating a URLClassLoader should be
//in a doPrivileged() block.
ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
public ClassLoader run() {
return new URLClassLoader(urls);
}
});
job.setClassLoader(loader);
}
runJob(job);
return 0;
} catch (ParseException pe) {
LOG.info("Error : " + pe);
cli.printUsage();
return 1;
}
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class Cluster method getJob.
/**
* Get job corresponding to jobid.
*
* @param jobId
* @return object of {@link Job}
* @throws IOException
* @throws InterruptedException
*/
public Job getJob(JobID jobId) throws IOException, InterruptedException {
JobStatus status = client.getJobStatus(jobId);
if (status != null) {
final JobConf conf = new JobConf();
final Path jobPath = new Path(client.getFilesystemName(), status.getJobFile());
final FileSystem fs = FileSystem.get(jobPath.toUri(), getConf());
try {
conf.addResource(fs.open(jobPath), jobPath.toString());
} catch (FileNotFoundException fnf) {
if (LOG.isWarnEnabled()) {
LOG.warn("Job conf missing on cluster", fnf);
}
}
return Job.getInstance(this, status, conf);
}
return null;
}
Aggregations