use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class UserNamePermission method main.
public static void main(String[] args) throws Exception {
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClass(UserNamePermission.class);
job.setMapperClass(UserNamePermission.UserNameMapper.class);
job.setCombinerClass(UserNamePermission.UserNameReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(UserNamePermission.UserNameReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestUberAM method testFailingMapper.
@Override
@Test
public void testFailingMapper() throws IOException, InterruptedException, ClassNotFoundException {
LOG.info("\n\n\nStarting uberized testFailingMapper().");
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
return;
}
Job job = runFailingMapperJob();
// should be able to get diags for single task attempt...
TaskID taskID = new TaskID(job.getJobID(), TaskType.MAP, 0);
TaskAttemptID aId = new TaskAttemptID(taskID, 0);
System.out.println("Diagnostics for " + aId + " :");
for (String diag : job.getTaskDiagnostics(aId)) {
System.out.println(diag);
}
// ...but not for second (shouldn't exist: uber-AM overrode max attempts)
boolean secondTaskAttemptExists = true;
try {
aId = new TaskAttemptID(taskID, 1);
System.out.println("Diagnostics for " + aId + " :");
for (String diag : job.getTaskDiagnostics(aId)) {
System.out.println(diag);
}
} catch (Exception e) {
secondTaskAttemptExists = false;
}
Assert.assertEquals(false, secondTaskAttemptExists);
TaskCompletionEvent[] events = job.getTaskCompletionEvents(0, 2);
Assert.assertEquals(1, events.length);
// TIPFAILED if it comes from the AM, FAILED if it comes from the JHS
TaskCompletionEvent.Status status = events[0].getStatus();
Assert.assertTrue(status == TaskCompletionEvent.Status.FAILED || status == TaskCompletionEvent.Status.TIPFAILED);
Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());
//Disabling till UberAM honors MRJobConfig.MAP_MAX_ATTEMPTS
//verifyFailingMapperCounters(job);
// TODO later: add explicit "isUber()" checks of some sort
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class WordCount method main.
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class WordMedian method run.
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word median");
job.setJarByClass(WordMedian.class);
job.setMapperClass(WordMedianMapper.class);
job.setCombinerClass(WordMedianReducer.class);
job.setReducerClass(WordMedianReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters().getGroup(TaskCounter.class.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class DistSum method compute.
/** Start a job to compute sigma */
private void compute(final String name, Summation sigma) throws IOException {
if (sigma.getValue() != null)
throw new IOException("sigma.getValue() != null, sigma=" + sigma);
//setup remote directory
final FileSystem fs = FileSystem.get(getConf());
final Path dir = fs.makeQualified(new Path(parameters.remoteDir, name));
if (!Util.createNonexistingDirectory(fs, dir))
return;
//setup a job
final Job job = createJob(name, sigma);
final Path outdir = new Path(dir, "out");
FileOutputFormat.setOutputPath(job, outdir);
//start a map/reduce job
final String startmessage = "steps/parts = " + sigma.E.getSteps() + "/" + parameters.nParts + " = " + Util.long2string(sigma.E.getSteps() / parameters.nParts);
Util.runJob(name, job, parameters.machine, startmessage, timer);
final List<TaskResult> results = Util.readJobOutputs(fs, outdir);
Util.writeResults(name, results, fs, parameters.remoteDir);
fs.delete(dir, true);
//combine results
final List<TaskResult> combined = Util.combine(results);
final PrintWriter out = Util.createWriter(parameters.localDir, name);
try {
for (TaskResult r : combined) {
final String s = taskResult2string(name, r);
out.println(s);
out.flush();
Util.out.println(s);
}
} finally {
out.close();
}
if (combined.size() == 1) {
final Summation s = combined.get(0).getElement();
if (sigma.contains(s) && s.contains(sigma))
sigma.setValue(s.getValue());
}
}
Aggregations