use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestCombineFileInputFormat method testReinit.
@Test
public void testReinit() throws Exception {
// Test that a split containing multiple files works correctly,
// with the child RecordReader getting its initialize() method
// called a second time.
TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
Configuration conf = new Configuration();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);
// This will create a CombineFileRecordReader that itself contains a
// DummyRecordReader.
InputFormat inputFormat = new ChildRRInputFormat();
Path[] files = { new Path("file1"), new Path("file2") };
long[] lengths = { 1, 1 };
CombineFileSplit split = new CombineFileSplit(files, lengths);
RecordReader rr = inputFormat.createRecordReader(split, context);
assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
// first initialize() call comes from MapTask. We'll do it here.
rr.initialize(split, context);
// First value is first filename.
assertTrue(rr.nextKeyValue());
assertEquals("file1", rr.getCurrentValue().toString());
// The inner RR will return false, because it only emits one (k, v) pair.
// But there's another sub-split to process. This returns true to us.
assertTrue(rr.nextKeyValue());
// And the 2nd rr will have its initialize method called correctly.
assertEquals("file2", rr.getCurrentValue().toString());
// But after both child RR's have returned their singleton (k, v), this
// should also return false.
assertFalse(rr.nextKeyValue());
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestCombineFileInputFormat method testRecordReaderInit.
@Test
public void testRecordReaderInit() throws InterruptedException, IOException {
// Test that we properly initialize the child recordreader when
// CombineFileInputFormat and CombineFileRecordReader are used.
TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
Configuration conf1 = new Configuration();
conf1.set(DUMMY_KEY, "STATE1");
TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);
// This will create a CombineFileRecordReader that itself contains a
// DummyRecordReader.
InputFormat inputFormat = new ChildRRInputFormat();
Path[] files = { new Path("file1") };
long[] lengths = { 1 };
CombineFileSplit split = new CombineFileSplit(files, lengths);
RecordReader rr = inputFormat.createRecordReader(split, context1);
assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
// Verify that the initial configuration is the one being used.
// Right after construction the dummy key should have value "STATE1"
assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString());
// Switch the active context for the RecordReader...
Configuration conf2 = new Configuration();
conf2.set(DUMMY_KEY, "STATE2");
TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
rr.initialize(split, context2);
// And verify that the new context is updated into the child record reader.
assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString());
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestMRCJCFileOutputCommitter method testEmptyOutput.
public void testEmptyOutput() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// Do not write any output
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestMRCJCFileOutputCommitter method testFailAbort.
@SuppressWarnings("unchecked")
public void testFailAbort() throws IOException, InterruptedException {
Job job = Job.getInstance();
Configuration conf = job.getConfiguration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///");
conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class);
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
FileOutputFormat.setOutputPath(job, outDir);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// do setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// write output
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter<?, ?> theRecordWriter = theOutputFormat.getRecordWriter(tContext);
writeOutput(theRecordWriter, tContext);
// do abort
Throwable th = null;
try {
committer.abortTask(tContext);
} catch (IOException ie) {
th = ie;
}
assertNotNull(th);
assertTrue(th instanceof IOException);
assertTrue(th.getMessage().contains("fake delete failed"));
//Path taskBaseDirName = committer.getTaskAttemptBaseDirName(tContext);
File jobTmpDir = new File(committer.getJobAttemptPath(jContext).toUri().getPath());
File taskTmpDir = new File(committer.getTaskAttemptPath(tContext).toUri().getPath());
File expectedFile = new File(taskTmpDir, partFile);
assertTrue(expectedFile + " does not exists", expectedFile.exists());
th = null;
try {
committer.abortJob(jContext, JobStatus.State.FAILED);
} catch (IOException ie) {
th = ie;
}
assertNotNull(th);
assertTrue(th instanceof IOException);
assertTrue(th.getMessage().contains("fake delete failed"));
assertTrue("job temp dir does not exists", jobTmpDir.exists());
FileUtil.fullyDelete(new File(outDir.toString()));
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hadoop by apache.
the class TestFixedLengthInputFormat method runRandomTests.
private void runRandomTests(CompressionCodec codec) throws Exception {
StringBuilder fileName = new StringBuilder("testFormat.txt");
if (codec != null) {
fileName.append(".gz");
}
localFs.delete(workDir, true);
Path file = new Path(workDir, fileName.toString());
int seed = new Random().nextInt();
LOG.info("Seed = " + seed);
Random random = new Random(seed);
int MAX_TESTS = 20;
LongWritable key;
BytesWritable value;
for (int i = 0; i < MAX_TESTS; i++) {
LOG.info("----------------------------------------------------------");
// Maximum total records of 999
int totalRecords = random.nextInt(999) + 1;
// Test an empty file
if (i == 8) {
totalRecords = 0;
}
// Maximum bytes in a record of 100K
int recordLength = random.nextInt(1024 * 100) + 1;
// For the 11th test, force a record length of 1
if (i == 10) {
recordLength = 1;
}
// The total bytes in the test file
int fileSize = (totalRecords * recordLength);
LOG.info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
// Create the job
Job job = Job.getInstance(defaultConf);
if (codec != null) {
ReflectionUtils.setConf(codec, job.getConfiguration());
}
// Create the test file
ArrayList<String> recordList = createFile(file, codec, recordLength, totalRecords);
assertTrue(localFs.exists(file));
//set the fixed length record length config property for the job
FixedLengthInputFormat.setRecordLength(job.getConfiguration(), recordLength);
int numSplits = 1;
// Arbitrarily set number of splits.
if (i > 0) {
if (i == (MAX_TESTS - 1)) {
// Test a split size that is less than record len
numSplits = (int) (fileSize / Math.floor(recordLength / 2));
} else {
if (MAX_TESTS % i == 0) {
// Let us create a split size that is forced to be
// smaller than the end file itself, (ensures 1+ splits)
numSplits = fileSize / (fileSize - random.nextInt(fileSize));
} else {
// Just pick a random split size with no upper bound
numSplits = Math.max(1, fileSize / random.nextInt(Integer.MAX_VALUE));
}
}
LOG.info("Number of splits set to: " + numSplits);
}
job.getConfiguration().setLong("mapreduce.input.fileinputformat.split.maxsize", (long) (fileSize / numSplits));
// setup the input path
FileInputFormat.setInputPaths(job, workDir);
// Try splitting the file in a variety of sizes
FixedLengthInputFormat format = new FixedLengthInputFormat();
List<InputSplit> splits = format.getSplits(job);
LOG.info("Actual number of splits = " + splits.size());
// Test combined split lengths = total file size
long recordOffset = 0;
int recordNumber = 0;
for (InputSplit split : splits) {
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
Class<?> clazz = reader.getClass();
assertEquals("RecordReader class should be FixedLengthRecordReader:", FixedLengthRecordReader.class, clazz);
// Plow through the records in this split
while (reader.nextKeyValue()) {
key = reader.getCurrentKey();
value = reader.getCurrentValue();
assertEquals("Checking key", (long) (recordNumber * recordLength), key.get());
String valueString = new String(value.getBytes(), 0, value.getLength());
assertEquals("Checking record length:", recordLength, value.getLength());
assertTrue("Checking for more records than expected:", recordNumber < totalRecords);
String origRecord = recordList.get(recordNumber);
assertEquals("Checking record content:", origRecord, valueString);
recordNumber++;
}
reader.close();
}
assertEquals("Total original records should be total read records:", recordList.size(), recordNumber);
}
}
Aggregations