Search in sources :

Example 76 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class Join method run.

/**
   * The main driver for sort program.
   * Invoke this method to submit the map/reduce job.
   * @throws IOException When there is communication problems with the 
   *                     job tracker.
   */
@SuppressWarnings("unchecked")
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String join_reduces = conf.get(REDUCES_PER_HOST);
    if (join_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("join");
    job.setJarByClass(Sort.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-joinOp".equals(args[i])) {
                op = args[++i];
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            // exits
            return printUsage();
        }
    }
    // Set user-supplied (possibly default) job configs
    job.setNumReduceTasks(num_reduces);
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
        plist.add(new Path(s));
    }
    job.setInputFormatClass(CompositeInputFormat.class);
    job.getConfiguration().set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    job.setOutputFormatClass(outputFormatClass);
    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}
Also used : SequenceFileOutputFormat(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat) Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) ArrayList(java.util.ArrayList) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) SequenceFileOutputFormat(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat) Writable(org.apache.hadoop.io.Writable) TupleWritable(org.apache.hadoop.mapreduce.lib.join.TupleWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TupleWritable(org.apache.hadoop.mapreduce.lib.join.TupleWritable) JobClient(org.apache.hadoop.mapred.JobClient) Date(java.util.Date) Job(org.apache.hadoop.mapreduce.Job) ClusterStatus(org.apache.hadoop.mapred.ClusterStatus)

Example 77 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class CompressMapper method getCompressJob.

public static Job getCompressJob(String jobname, Configuration conf, String inputpath, String outputpath) throws Exception {
    Job job = Job.getInstance(conf, jobname + "-CompressMapperJob");
    job.setJarByClass(CompressMapper.class);
    job.setMapperClass(TextCompressMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    // if output file exists ,delete it
    final FileSystem hdfs = FileSystem.get(new ScenarioConfiguration());
    if (hdfs.exists(new Path(outputpath))) {
        hdfs.delete(new Path(outputpath), true);
    }
    hdfs.close();
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputpath));
    FileOutputFormat.setOutputPath(job, new Path(outputpath));
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 78 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestCompressionEmulationUtils method runDataGenJob.

/**
   * Runs a GridMix data-generation job.
   */
private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClassNotFoundException, InterruptedException {
    JobClient client = new JobClient(conf);
    // get the local job runner
    conf.setInt(MRJobConfig.NUM_MAPS, 1);
    Job job = Job.getInstance(conf);
    CompressionEmulationUtil.configure(job);
    job.setInputFormatClass(CustomInputFormat.class);
    // set the output path
    FileOutputFormat.setOutputPath(job, tempDir);
    // submit and wait for completion
    job.submit();
    int ret = job.waitForCompletion(true) ? 0 : 1;
    assertEquals("Job Failed", 0, ret);
}
Also used : Job(org.apache.hadoop.mapreduce.Job) JobClient(org.apache.hadoop.mapred.JobClient)

Example 79 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestDistCacheEmulation method testGenerateDistCacheData.

/**
   * Validate GenerateDistCacheData job if it creates dist cache files properly.
   * 
   * @throws Exception
   */
@Test(timeout = 200000)
public void testGenerateDistCacheData() throws Exception {
    long[] sortedFileSizes = new long[5];
    Configuration jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes);
    GridmixJob gridmixJob = new GenerateDistCacheData(jobConf);
    Job job = gridmixJob.call();
    assertEquals("Number of reduce tasks in GenerateDistCacheData is not 0.", 0, job.getNumReduceTasks());
    assertTrue("GenerateDistCacheData job failed.", job.waitForCompletion(false));
    validateDistCacheData(jobConf, sortedFileSizes);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 80 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class ValueAggregatorJob method main.

/**
   * create and run an Aggregate based map/reduce job.
   * 
   * @param args the arguments used for job creation
   * @throws IOException
   */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = ValueAggregatorJob.createValueAggregatorJob(new Configuration(), args);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    System.exit(ret);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ControlledJob(org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33