use of org.apache.hadoop.mapreduce.Job in project hbase by apache.
the class TestRowCounter method runRowCount.
/**
* Run the RowCounter map reduce job and verify the row count.
*
* @param args the command line arguments to be used for rowcounter job.
* @param expectedCount the expected row count (result of map reduce job).
* @throws Exception
*/
private void runRowCount(String[] args, int expectedCount) throws Exception {
Job job = RowCounter.createSubmittableJob(TEST_UTIL.getConfiguration(), args);
long start = System.currentTimeMillis();
job.waitForCompletion(true);
long duration = System.currentTimeMillis() - start;
LOG.debug("row count duration (ms): " + duration);
assertTrue(job.isSuccessful());
Counter counter = job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS);
assertEquals(expectedCount, counter.getValue());
}
use of org.apache.hadoop.mapreduce.Job in project hadoop-book by elephantscale.
the class SecondarySort method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: secondarysrot <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "secondary sort");
job.setJarByClass(SecondarySort.class);
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
// group and partition by the first int in the pair
job.setPartitionerClass(FirstPartitioner.class);
job.setGroupingComparatorClass(FirstGroupingComparator.class);
// the map output is IntPair, IntWritable
job.setMapOutputKeyClass(IntPair.class);
job.setMapOutputValueClass(IntWritable.class);
// the reduce output is Text, IntWritable
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop-book by elephantscale.
the class MRWordCount21 method run.
@Override
public int run(String[] args) throws Exception {
System.out.println("Running MR: MRWordCount21");
Job job = new Job(getConf());
job.setJarByClass(MRWordCount21.class);
job.setJobName("MRWordCount21");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map21.class);
job.setCombinerClass(Reduce21.class);
job.setReducerClass(Reduce21.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
System.out.println("Input path: " + args[0]);
System.out.println("Output path: " + args[1]);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
use of org.apache.hadoop.mapreduce.Job in project mavuno by metzlerd.
the class CombineScores method run.
public int run() throws ClassNotFoundException, InterruptedException, IOException {
Configuration conf = getConf();
String inputPath = MavunoUtils.getRequiredParam("Mavuno.CombineScores.InputPath", conf);
String outputPath = MavunoUtils.getRequiredParam("Mavuno.CombineScores.OutputPath", conf);
sLogger.info("Tool name: CombineScores");
sLogger.info(" - Input path: " + inputPath);
sLogger.info(" - Output path: " + outputPath);
Job job = new Job(conf);
job.setJobName("CombineScores");
job.setJarByClass(CombineScores.class);
MavunoUtils.recursivelyAddInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.waitForCompletion(true);
return 0;
}
use of org.apache.hadoop.mapreduce.Job in project mavuno by metzlerd.
the class Split method run.
public int run() throws ClassNotFoundException, InterruptedException, IOException {
Configuration conf = getConf();
String inputPath = MavunoUtils.getRequiredParam("Mavuno.Split.InputPath", conf);
String outputPath = MavunoUtils.getRequiredParam("Mavuno.Split.OutputPath", conf);
String splitKey = MavunoUtils.getRequiredParam("Mavuno.Split.SplitKey", conf);
sLogger.info("Tool name: Split");
sLogger.info(" - Input path: " + inputPath);
sLogger.info(" - Output path: " + outputPath);
sLogger.info(" - Split key: " + splitKey);
Job job = new Job(conf);
job.setJobName("Split");
job.setJarByClass(Split.class);
MavunoUtils.recursivelyAddInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(ContextPatternWritable.class);
job.setSortComparatorClass(ContextPatternWritable.Comparator.class);
job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(ContextPatternWritable.class);
job.setOutputValueClass(DoubleWritable.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
return 0;
}
Aggregations