use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestMRCJCFileOutputCommitter method testAbort.
@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// do setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do abort
committer.abortTask(tContext);
File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
assertFalse("task temp dir still exists", expectedFile.exists());
committer.abortJob(jContext, JobStatus.State.FAILED);
expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
assertFalse("job temp dir still exists", expectedFile.exists());
assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestMRMultipleOutputs method _testMultipleOutputs.
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
String input = "a\nb\nc\nd\ne\nc\nd\ne";
Configuration conf = createJobConf();
Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
job.setJobName("mo");
MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(job, withCounters);
job.setMapperClass(MOMap.class);
job.setReducerClass(MOReduce.class);
job.waitForCompletion(true);
// assert number of named output part files
int namedOutputCount = 0;
int valueBasedOutputCount = 0;
FileSystem fs = OUT_DIR.getFileSystem(conf);
FileStatus[] statuses = fs.listStatus(OUT_DIR);
for (FileStatus status : statuses) {
String fileName = status.getPath().getName();
if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) {
namedOutputCount++;
} else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
valueBasedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
assertEquals(5, valueBasedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith(TEXT));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);
assertEquals(IntWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
IntWritable key = new IntWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals(SEQUENCE, value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
if (withCounters) {
CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
assertEquals(9, counters.size());
assertEquals(4, counters.findCounter(TEXT).getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
assertEquals(2, counters.findCounter("a").getValue());
assertEquals(2, counters.findCounter("b").getValue());
assertEquals(4, counters.findCounter("c").getValue());
assertEquals(4, counters.findCounter("d").getValue());
assertEquals(4, counters.findCounter("e").getValue());
}
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
FileOutputFormat.setOutputPath(job, outdir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
OutputCommitter committer = outputFormat.getOutputCommitter(context);
committer.setupJob(job);
RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(context);
}
committer.commitTask(context);
committer.commitJob(job);
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
SequenceFileInputFormat.setInputPaths(job, outdir);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int sourceInt;
double sourceDouble;
while (reader.nextKeyValue()) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
iwritable = reader.getCurrentKey();
dwritable = reader.getCurrentValue();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testcheckOutputSpecsForbidRecordCompression.
@Test
public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
Job job = Job.getInstance();
FileSystem fs = FileSystem.getLocal(job.getConfiguration());
Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
fs.delete(outputdir, true);
// Without outputpath, FileOutputFormat.checkoutputspecs will throw
// InvalidJobConfException
FileOutputFormat.setOutputPath(job, outputdir);
// SequenceFileAsBinaryOutputFormat doesn't support record compression
// It should throw an exception when checked by checkOutputSpecs
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
try {
new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
} catch (Exception e) {
fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClass().getName());
}
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
try {
new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
} catch (InvalidJobConfException ie) {
// expected
} catch (Exception e) {
fail("Expected " + InvalidJobConfException.class.getName() + "but caught " + e.getClass().getName());
}
}
use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.
the class TestDataDrivenDBInputFormat method testDateSplits.
@Test
public void testDateSplits() throws Exception {
Statement s = connection.createStatement();
final String DATE_TABLE = "datetable";
final String COL = "foo";
try {
// delete the table if it already exists.
s.executeUpdate("DROP TABLE " + DATE_TABLE);
} catch (SQLException e) {
}
// Create the table.
s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " DATE)");
s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");
// commit this tx.
connection.commit();
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
FileSystem fs = FileSystem.getLocal(conf);
fs.delete(new Path(OUT_DIR), true);
// now do a dd import
Job job = Job.getInstance(conf);
job.setMapperClass(ValMapper.class);
job.setReducerClass(Reducer.class);
job.setMapOutputKeyClass(DateCol.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(DateCol.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(1);
job.getConfiguration().setInt("mapreduce.map.tasks", 2);
FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, null, null);
DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);
boolean ret = job.waitForCompletion(true);
assertTrue("job failed", ret);
// Check to see that we imported as much as we thought we did.
assertEquals("Did not get all the records", 4, job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue());
}
Aggregations