Search in sources :

Example 71 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class NonSortTest method getJob.

private Job getJob(Configuration conf, String jobName, String inputpath, String outputpath) throws IOException {
    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(outputpath))) {
        fs.delete(new Path(outputpath), true);
    }
    fs.close();
    final Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(NonSortTestMR.class);
    job.setMapperClass(NonSortTestMR.Map.class);
    job.setReducerClass(NonSortTestMR.KeyHashSumReduce.class);
    job.setOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputpath));
    FileOutputFormat.setOutputPath(job, new Path(outputpath));
    return job;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 72 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class NonSortTest method nonSortTest.

@Test
public void nonSortTest() throws Exception {
    Configuration nativeConf = ScenarioConfiguration.getNativeConfiguration();
    nativeConf.addResource(TestConstants.NONSORT_TEST_CONF);
    nativeConf.set(TestConstants.NATIVETASK_MAP_OUTPUT_SORT, "false");
    final Job nativeNonSort = getJob(nativeConf, "NativeNonSort", TestConstants.NATIVETASK_NONSORT_TEST_INPUTDIR, TestConstants.NATIVETASK_NONSORT_TEST_NATIVE_OUTPUT);
    assertTrue(nativeNonSort.waitForCompletion(true));
    Configuration normalConf = ScenarioConfiguration.getNormalConfiguration();
    normalConf.addResource(TestConstants.NONSORT_TEST_CONF);
    final Job hadoopWithSort = getJob(normalConf, "NormalJob", TestConstants.NATIVETASK_NONSORT_TEST_INPUTDIR, TestConstants.NATIVETASK_NONSORT_TEST_NORMAL_OUTPUT);
    assertTrue(hadoopWithSort.waitForCompletion(true));
    final boolean compareRet = ResultVerifier.verify(TestConstants.NATIVETASK_NONSORT_TEST_NATIVE_OUTPUT, TestConstants.NATIVETASK_NONSORT_TEST_NORMAL_OUTPUT);
    assertEquals("file compare result: if they are the same ,then return true", true, compareRet);
    ResultVerifier.verifyCounters(hadoopWithSort, nativeNonSort);
}
Also used : ScenarioConfiguration(org.apache.hadoop.mapred.nativetask.testutil.ScenarioConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 73 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class AggregateWordHistogram method main.

/**
   * The main driver for word count map/reduce program. Invoke this method to
   * submit the map/reduce job.
   * 
   * @throws IOException
   *           When there is communication problems with the job tracker.
   */
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Class[] { AggregateWordHistogramPlugin.class });
    job.setJarByClass(AggregateWordCount.class);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    System.exit(ret);
}
Also used : ValueAggregatorJob(org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob) Job(org.apache.hadoop.mapreduce.Job)

Example 74 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class BaileyBorweinPlouffe method createJob.

/** Create and setup a job */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);
    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);
    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);
    // setup input
    job.setInputFormatClass(BbpInputFormat.class);
    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job)

Example 75 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class DBCountPageView method run.

@Override
public //Usage DBCountPageView [driverClass dburl]
int run(String[] args) throws Exception {
    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;
    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }
    initialize(driverClassName, url);
    Configuration conf = getConf();
    DBConfiguration.configureDB(conf, driverClassName, url);
    Job job = Job.getInstance(conf);
    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);
    DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames);
    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DBConfiguration(org.apache.hadoop.mapreduce.lib.db.DBConfiguration) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33