Examples with JobConf - org.apache.hadoop.mapred.JobConf

Example 51 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class HadoopArchives method main.

/** the main functions **/
public static void main(String[] args) {
    JobConf job = new JobConf(HadoopArchives.class);
    HadoopArchives harchives = new HadoopArchives(job);
    int ret = 0;
    try {
        ret = ToolRunner.run(harchives, args);
    } catch (Exception e) {
        LOG.debug("Exception in archives  ", e);
        System.err.println(e.getClass().getSimpleName() + " in archives");
        final String s = e.getLocalizedMessage();
        if (s != null) {
            System.err.println(s);
        } else {
            e.printStackTrace(System.err);
        }
        System.exit(1);
    }
    System.exit(ret);
}

Also used : JobConf(org.apache.hadoop.mapred.JobConf) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Example 52 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class HadoopArchiveLogs method main.

public static void main(String[] args) {
    JobConf job = new JobConf(HadoopArchiveLogs.class);
    HadoopArchiveLogs hal = new HadoopArchiveLogs(job);
    int ret = 0;
    try {
        ret = ToolRunner.run(hal, args);
    } catch (Exception e) {
        LOG.debug("Exception", e);
        System.err.println(e.getClass().getSimpleName());
        final String s = e.getLocalizedMessage();
        if (s != null) {
            System.err.println(s);
        } else {
            e.printStackTrace(System.err);
        }
        System.exit(1);
    }
    System.exit(ret);
}

Also used : JobConf(org.apache.hadoop.mapred.JobConf) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) ParseException(org.apache.commons.cli.ParseException) ApplicationNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException)

Example 53 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class DataJoinJob method createDataJoinJob.

public static JobConf createDataJoinJob(String[] args) throws IOException {
    String inputDir = args[0];
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, DataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);
    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir), true);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormat(inputFormat);
    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);
    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) TextOutputFormat(org.apache.hadoop.mapred.TextOutputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) JobConf(org.apache.hadoop.mapred.JobConf)

Example 54 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class Submitter method run.

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");
    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {
        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());
        JobConf job = new JobConf(getConf());
        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }
        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {

                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }
        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

Also used : Path(org.apache.hadoop.fs.Path) NullOutputFormat(org.apache.hadoop.mapred.lib.NullOutputFormat) OutputFormat(org.apache.hadoop.mapred.OutputFormat) LazyOutputFormat(org.apache.hadoop.mapred.lib.LazyOutputFormat) FileOutputFormat(org.apache.hadoop.mapred.FileOutputFormat) URL(java.net.URL) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) BasicParser(org.apache.commons.cli.BasicParser) Parser(org.apache.commons.cli.Parser) Mapper(org.apache.hadoop.mapred.Mapper) CommandLine(org.apache.commons.cli.CommandLine) StringTokenizer(java.util.StringTokenizer) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) URLClassLoader(java.net.URLClassLoader) URLClassLoader(java.net.URLClassLoader) ParseException(org.apache.commons.cli.ParseException) Reducer(org.apache.hadoop.mapred.Reducer) JobConf(org.apache.hadoop.mapred.JobConf) HashPartitioner(org.apache.hadoop.mapred.lib.HashPartitioner) Partitioner(org.apache.hadoop.mapred.Partitioner) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 55 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class Cluster method getJob.

/**
   * Get job corresponding to jobid.
   * 
   * @param jobId
   * @return object of {@link Job}
   * @throws IOException
   * @throws InterruptedException
   */
public Job getJob(JobID jobId) throws IOException, InterruptedException {
    JobStatus status = client.getJobStatus(jobId);
    if (status != null) {
        final JobConf conf = new JobConf();
        final Path jobPath = new Path(client.getFilesystemName(), status.getJobFile());
        final FileSystem fs = FileSystem.get(jobPath.toUri(), getConf());
        try {
            conf.addResource(fs.open(jobPath), jobPath.toString());
        } catch (FileNotFoundException fnf) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Job conf missing on cluster", fnf);
            }
        }
        return Job.getInstance(this, status, conf);
    }
    return null;
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FileNotFoundException(java.io.FileNotFoundException) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41