Examples with Job - org.apache.hadoop.mapreduce.Job

Example 66 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class UserNamePermission method main.

public static void main(String[] args) throws Exception {
    Path outDir = new Path("output");
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "user name check");
    job.setJarByClass(UserNamePermission.class);
    job.setMapperClass(UserNamePermission.UserNameMapper.class);
    job.setCombinerClass(UserNamePermission.UserNameReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(UserNamePermission.UserNameReducer.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, outDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job)

Example 67 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestUberAM method testFailingMapper.

@Override
@Test
public void testFailingMapper() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting uberized testFailingMapper().");
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    Job job = runFailingMapperJob();
    // should be able to get diags for single task attempt...
    TaskID taskID = new TaskID(job.getJobID(), TaskType.MAP, 0);
    TaskAttemptID aId = new TaskAttemptID(taskID, 0);
    System.out.println("Diagnostics for " + aId + " :");
    for (String diag : job.getTaskDiagnostics(aId)) {
        System.out.println(diag);
    }
    // ...but not for second (shouldn't exist:  uber-AM overrode max attempts)
    boolean secondTaskAttemptExists = true;
    try {
        aId = new TaskAttemptID(taskID, 1);
        System.out.println("Diagnostics for " + aId + " :");
        for (String diag : job.getTaskDiagnostics(aId)) {
            System.out.println(diag);
        }
    } catch (Exception e) {
        secondTaskAttemptExists = false;
    }
    Assert.assertEquals(false, secondTaskAttemptExists);
    TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2);
    Assert.assertEquals(1, events.length);
    // TIPFAILED if it comes from the AM, FAILED if it comes from the JHS
    TaskCompletionEvent.Status status = events[0].getStatus();
    Assert.assertTrue(status == TaskCompletionEvent.Status.FAILED || status == TaskCompletionEvent.Status.TIPFAILED);
    Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
//Disabling till UberAM honors MRJobConfig.MAP_MAX_ATTEMPTS
//verifyFailingMapperCounters(job);
// TODO later:  add explicit "isUber()" checks of some sort
}

Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskCompletionEvent(org.apache.hadoop.mapreduce.TaskCompletionEvent) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 68 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class WordCount method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 69 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class WordMedian method run.

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }
    setConf(new Configuration());
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "word median");
    job.setJarByClass(WordMedian.class);
    job.setMapperClass(WordMedianMapper.class);
    job.setCombinerClass(WordMedianReducer.class);
    job.setReducerClass(WordMedianReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    // Wait for JOB 1 -- get middle value to check for Median
    long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
    return (result ? 0 : 1);
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter)

Example 70 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class DistSum method compute.

/** Start a job to compute sigma */
private void compute(final String name, Summation sigma) throws IOException {
    if (sigma.getValue() != null)
        throw new IOException("sigma.getValue() != null, sigma=" + sigma);
    //setup remote directory
    final FileSystem fs = FileSystem.get(getConf());
    final Path dir = fs.makeQualified(new Path(parameters.remoteDir, name));
    if (!Util.createNonexistingDirectory(fs, dir))
        return;
    //setup a job
    final Job job = createJob(name, sigma);
    final Path outdir = new Path(dir, "out");
    FileOutputFormat.setOutputPath(job, outdir);
    //start a map/reduce job
    final String startmessage = "steps/parts = " + sigma.E.getSteps() + "/" + parameters.nParts + " = " + Util.long2string(sigma.E.getSteps() / parameters.nParts);
    Util.runJob(name, job, parameters.machine, startmessage, timer);
    final List<TaskResult> results = Util.readJobOutputs(fs, outdir);
    Util.writeResults(name, results, fs, parameters.remoteDir);
    fs.delete(dir, true);
    //combine results
    final List<TaskResult> combined = Util.combine(results);
    final PrintWriter out = Util.createWriter(parameters.localDir, name);
    try {
        for (TaskResult r : combined) {
            final String s = taskResult2string(name, r);
            out.println(s);
            out.flush();
            Util.out.println(s);
        }
    } finally {
        out.close();
    }
    if (combined.size() == 1) {
        final Summation s = combined.get(0).getElement();
        if (sigma.contains(s) && s.contains(sigma))
            sigma.setValue(s.getValue());
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) Summation(org.apache.hadoop.examples.pi.math.Summation) PrintWriter(java.io.PrintWriter)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33