use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class NonSortTest method getJob.
private Job getJob(Configuration conf, String jobName, String inputpath, String outputpath) throws IOException {
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(outputpath))) {
fs.delete(new Path(outputpath), true);
}
fs.close();
final Job job = Job.getInstance(conf, jobName);
job.setJarByClass(NonSortTestMR.class);
job.setMapperClass(NonSortTestMR.Map.class);
job.setReducerClass(NonSortTestMR.KeyHashSumReduce.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(inputpath));
FileOutputFormat.setOutputPath(job, new Path(outputpath));
return job;
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class NonSortTest method nonSortTest.
@Test
public void nonSortTest() throws Exception {
Configuration nativeConf = ScenarioConfiguration.getNativeConfiguration();
nativeConf.addResource(TestConstants.NONSORT_TEST_CONF);
nativeConf.set(TestConstants.NATIVETASK_MAP_OUTPUT_SORT, "false");
final Job nativeNonSort = getJob(nativeConf, "NativeNonSort", TestConstants.NATIVETASK_NONSORT_TEST_INPUTDIR, TestConstants.NATIVETASK_NONSORT_TEST_NATIVE_OUTPUT);
assertTrue(nativeNonSort.waitForCompletion(true));
Configuration normalConf = ScenarioConfiguration.getNormalConfiguration();
normalConf.addResource(TestConstants.NONSORT_TEST_CONF);
final Job hadoopWithSort = getJob(normalConf, "NormalJob", TestConstants.NATIVETASK_NONSORT_TEST_INPUTDIR, TestConstants.NATIVETASK_NONSORT_TEST_NORMAL_OUTPUT);
assertTrue(hadoopWithSort.waitForCompletion(true));
final boolean compareRet = ResultVerifier.verify(TestConstants.NATIVETASK_NONSORT_TEST_NATIVE_OUTPUT, TestConstants.NATIVETASK_NONSORT_TEST_NORMAL_OUTPUT);
assertEquals("file compare result: if they are the same ,then return true", true, compareRet);
ResultVerifier.verifyCounters(hadoopWithSort, nativeNonSort);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class AggregateWordHistogram method main.
/**
* The main driver for word count map/reduce program. Invoke this method to
* submit the map/reduce job.
*
* @throws IOException
* When there is communication problems with the job tracker.
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Class[] { AggregateWordHistogramPlugin.class });
job.setJarByClass(AggregateWordCount.class);
int ret = job.waitForCompletion(true) ? 0 : 1;
System.exit(ret);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class BaileyBorweinPlouffe method createJob.
/** Create and setup a job */
private static Job createJob(String name, Configuration conf) throws IOException {
final Job job = Job.getInstance(conf, NAME + "_" + name);
final Configuration jobconf = job.getConfiguration();
job.setJarByClass(BaileyBorweinPlouffe.class);
// setup mapper
job.setMapperClass(BbpMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
// setup reducer
job.setReducerClass(BbpReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setNumReduceTasks(1);
// setup input
job.setInputFormatClass(BbpInputFormat.class);
// disable task timeout
jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
// do not use speculative execution
jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
return job;
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class DBCountPageView method run.
@Override
public //Usage DBCountPageView [driverClass dburl]
int run(String[] args) throws Exception {
String driverClassName = DRIVER_CLASS;
String url = DB_URL;
if (args.length > 1) {
driverClassName = args[0];
url = args[1];
}
initialize(driverClassName, url);
Configuration conf = getConf();
DBConfiguration.configureDB(conf, driverClassName, url);
Job job = Job.getInstance(conf);
job.setJobName("Count Pageviews of URLs");
job.setJarByClass(DBCountPageView.class);
job.setMapperClass(PageviewMapper.class);
job.setCombinerClass(LongSumReducer.class);
job.setReducerClass(PageviewReducer.class);
DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames);
DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(PageviewRecord.class);
job.setOutputValueClass(NullWritable.class);
int ret;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
boolean correct = verify();
if (!correct) {
throw new RuntimeException("Evaluation was not correct!");
}
} finally {
shutdown();
}
return ret;
}
Aggregations