Search in sources :

Example 1 with OutputFormat

use of org.apache.hadoop.mapred.OutputFormat in project hadoop by apache.

the class Submitter method run.

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");
    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {
        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());
        JobConf job = new JobConf(getConf());
        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }
        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {

                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }
        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NullOutputFormat(org.apache.hadoop.mapred.lib.NullOutputFormat) OutputFormat(org.apache.hadoop.mapred.OutputFormat) LazyOutputFormat(org.apache.hadoop.mapred.lib.LazyOutputFormat) FileOutputFormat(org.apache.hadoop.mapred.FileOutputFormat) URL(java.net.URL) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) BasicParser(org.apache.commons.cli.BasicParser) Parser(org.apache.commons.cli.Parser) Mapper(org.apache.hadoop.mapred.Mapper) CommandLine(org.apache.commons.cli.CommandLine) StringTokenizer(java.util.StringTokenizer) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) URLClassLoader(java.net.URLClassLoader) URLClassLoader(java.net.URLClassLoader) ParseException(org.apache.commons.cli.ParseException) Reducer(org.apache.hadoop.mapred.Reducer) JobConf(org.apache.hadoop.mapred.JobConf) HashPartitioner(org.apache.hadoop.mapred.lib.HashPartitioner) Partitioner(org.apache.hadoop.mapred.Partitioner) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 2 with OutputFormat

use of org.apache.hadoop.mapred.OutputFormat in project flink by apache.

the class HiveShimV110 method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz, Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) {
    try {
        Class utilClass = HiveFileFormatUtils.class;
        OutputFormat outputFormat = (OutputFormat) outputFormatClz.newInstance();
        Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, OutputFormat.class, Class.class, boolean.class, Properties.class, Path.class, Reporter.class);
        return (FileSinkOperator.RecordWriter) utilMethod.invoke(null, jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL);
    } catch (Exception e) {
        throw new CatalogException("Failed to create Hive RecordWriter", e);
    }
}
Also used : HiveFileFormatUtils(org.apache.hadoop.hive.ql.io.HiveFileFormatUtils) OutputFormat(org.apache.hadoop.mapred.OutputFormat) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) Method(java.lang.reflect.Method) FlinkHiveException(org.apache.flink.connectors.hive.FlinkHiveException) InvocationTargetException(java.lang.reflect.InvocationTargetException) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException)

Aggregations

OutputFormat (org.apache.hadoop.mapred.OutputFormat)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 URL (java.net.URL)1 URLClassLoader (java.net.URLClassLoader)1 StringTokenizer (java.util.StringTokenizer)1 BasicParser (org.apache.commons.cli.BasicParser)1 CommandLine (org.apache.commons.cli.CommandLine)1 ParseException (org.apache.commons.cli.ParseException)1 Parser (org.apache.commons.cli.Parser)1 FlinkHiveException (org.apache.flink.connectors.hive.FlinkHiveException)1 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)1 Path (org.apache.hadoop.fs.Path)1 HiveFileFormatUtils (org.apache.hadoop.hive.ql.io.HiveFileFormatUtils)1 FileInputFormat (org.apache.hadoop.mapred.FileInputFormat)1 FileOutputFormat (org.apache.hadoop.mapred.FileOutputFormat)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1 JobConf (org.apache.hadoop.mapred.JobConf)1 Mapper (org.apache.hadoop.mapred.Mapper)1 Partitioner (org.apache.hadoop.mapred.Partitioner)1