use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.
the class TestJoinProperties method testFormat.
@SuppressWarnings("unchecked")
public int testFormat(Configuration conf, int tupleSize, boolean firstTuple, boolean secondTuple, TestType ttype) throws Exception {
Job job = Job.getInstance(conf);
CompositeInputFormat format = new CompositeInputFormat();
int count = 0;
for (InputSplit split : (List<InputSplit>) format.getSplits(job)) {
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(conf);
RecordReader reader = format.createRecordReader(split, context);
MapContext mcontext = new MapContextImpl(conf, context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
WritableComparable key = null;
Writable value = null;
while (reader.nextKeyValue()) {
key = (WritableComparable) reader.getCurrentKey();
value = (Writable) reader.getCurrentValue();
validateKeyValue(key, value, tupleSize, firstTuple, secondTuple, ttype);
count++;
}
}
return count;
}
use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.
the class TestGridMixClasses method testLoadMapper.
/*
* test LoadMapper loadMapper should write to writer record for each reduce
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test(timeout = 10000)
public void testLoadMapper() throws Exception {
Configuration conf = new Configuration();
conf.setInt(JobContext.NUM_REDUCES, 2);
CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
TaskAttemptID taskId = new TaskAttemptID();
RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();
LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();
OutputCommitter committer = new CustomOutputCommitter();
StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
LoadSplit split = getLoadSplit();
MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(conf, taskId, reader, writer, committer, reporter, split);
// context
Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>().getMapContext(mapContext);
reader.initialize(split, ctx);
ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
CompressionEmulationUtil.setCompressionEmulationEnabled(ctx.getConfiguration(), true);
LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
// setup, map, clean
mapper.run(ctx);
Map<GridmixKey, GridmixRecord> data = writer.getData();
// check result
assertEquals(2, data.size());
}
use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.
the class MapTask method runNewMapper.
@SuppressWarnings("unchecked")
private <INKEY, INVALUE, OUTKEY, OUTVALUE> void runNewMapper(final JobConf job, final TaskSplitIndex splitIndex, final TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, ClassNotFoundException, InterruptedException {
// make a task context so we can get the classes
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(job, getTaskID(), reporter);
// make a mapper
org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(taskContext.getMapperClass(), job);
// make the input format
org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE> inputFormat = (org.apache.hadoop.mapreduce.InputFormat<INKEY, INVALUE>) ReflectionUtils.newInstance(taskContext.getInputFormatClass(), job);
// rebuild the input split
org.apache.hadoop.mapreduce.InputSplit split = null;
split = getSplitDetails(new Path(splitIndex.getSplitLocation()), splitIndex.getStartOffset());
LOG.info("Processing split: " + split);
org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = new NewTrackingRecordReader<INKEY, INVALUE>(split, inputFormat, reporter, taskContext);
job.setBoolean(JobContext.SKIP_RECORDS, isSkipping());
org.apache.hadoop.mapreduce.RecordWriter output = null;
// get an output object
if (job.getNumReduceTasks() == 0) {
output = new NewDirectOutputCollector(taskContext, job, umbilical, reporter);
} else {
output = new NewOutputCollector(taskContext, job, umbilical, reporter);
}
org.apache.hadoop.mapreduce.MapContext<INKEY, INVALUE, OUTKEY, OUTVALUE> mapContext = new MapContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, getTaskID(), input, output, committer, reporter, split);
org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context<INKEY, INVALUE, OUTKEY, OUTVALUE> mapperContext = new WrappedMapper<INKEY, INVALUE, OUTKEY, OUTVALUE>().getMapContext(mapContext);
try {
input.initialize(split, mapperContext);
mapper.run(mapperContext);
mapPhase.complete();
setPhase(TaskStatus.Phase.SORT);
statusUpdate(umbilical);
input.close();
input = null;
output.close(mapperContext);
output = null;
} finally {
closeQuietly(input);
closeQuietly(output, mapperContext);
}
}
use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.
the class TestFixedLengthInputFormat method runRandomTests.
private void runRandomTests(CompressionCodec codec) throws Exception {
StringBuilder fileName = new StringBuilder("testFormat.txt");
if (codec != null) {
fileName.append(".gz");
}
localFs.delete(workDir, true);
Path file = new Path(workDir, fileName.toString());
int seed = new Random().nextInt();
LOG.info("Seed = " + seed);
Random random = new Random(seed);
int MAX_TESTS = 20;
LongWritable key;
BytesWritable value;
for (int i = 0; i < MAX_TESTS; i++) {
LOG.info("----------------------------------------------------------");
// Maximum total records of 999
int totalRecords = random.nextInt(999) + 1;
// Test an empty file
if (i == 8) {
totalRecords = 0;
}
// Maximum bytes in a record of 100K
int recordLength = random.nextInt(1024 * 100) + 1;
// For the 11th test, force a record length of 1
if (i == 10) {
recordLength = 1;
}
// The total bytes in the test file
int fileSize = (totalRecords * recordLength);
LOG.info("totalRecords=" + totalRecords + " recordLength=" + recordLength);
// Create the job
Job job = Job.getInstance(defaultConf);
if (codec != null) {
ReflectionUtils.setConf(codec, job.getConfiguration());
}
// Create the test file
ArrayList<String> recordList = createFile(file, codec, recordLength, totalRecords);
assertTrue(localFs.exists(file));
//set the fixed length record length config property for the job
FixedLengthInputFormat.setRecordLength(job.getConfiguration(), recordLength);
int numSplits = 1;
// Arbitrarily set number of splits.
if (i > 0) {
if (i == (MAX_TESTS - 1)) {
// Test a split size that is less than record len
numSplits = (int) (fileSize / Math.floor(recordLength / 2));
} else {
if (MAX_TESTS % i == 0) {
// Let us create a split size that is forced to be
// smaller than the end file itself, (ensures 1+ splits)
numSplits = fileSize / (fileSize - random.nextInt(fileSize));
} else {
// Just pick a random split size with no upper bound
numSplits = Math.max(1, fileSize / random.nextInt(Integer.MAX_VALUE));
}
}
LOG.info("Number of splits set to: " + numSplits);
}
job.getConfiguration().setLong("mapreduce.input.fileinputformat.split.maxsize", (long) (fileSize / numSplits));
// setup the input path
FileInputFormat.setInputPaths(job, workDir);
// Try splitting the file in a variety of sizes
FixedLengthInputFormat format = new FixedLengthInputFormat();
List<InputSplit> splits = format.getSplits(job);
LOG.info("Actual number of splits = " + splits.size());
// Test combined split lengths = total file size
long recordOffset = 0;
int recordNumber = 0;
for (InputSplit split : splits) {
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
Class<?> clazz = reader.getClass();
assertEquals("RecordReader class should be FixedLengthRecordReader:", FixedLengthRecordReader.class, clazz);
// Plow through the records in this split
while (reader.nextKeyValue()) {
key = reader.getCurrentKey();
value = reader.getCurrentValue();
assertEquals("Checking key", (long) (recordNumber * recordLength), key.get());
String valueString = new String(value.getBytes(), 0, value.getLength());
assertEquals("Checking record length:", recordLength, value.getLength());
assertTrue("Checking for more records than expected:", recordNumber < totalRecords);
String origRecord = recordList.get(recordNumber);
assertEquals("Checking record content:", origRecord, valueString);
recordNumber++;
}
reader.close();
}
assertEquals("Total original records should be total read records:", recordList.size(), recordNumber);
}
}
use of org.apache.hadoop.mapreduce.task.MapContextImpl in project hadoop by apache.
the class TestFixedLengthInputFormat method testNoRecordLength.
/**
* Test with no record length set.
*/
@Test(timeout = 5000)
public void testNoRecordLength() throws Exception {
localFs.delete(workDir, true);
Path file = new Path(workDir, new String("testFormat.txt"));
createFile(file, null, 10, 10);
// Create the job and do not set fixed record length
Job job = Job.getInstance(defaultConf);
FileInputFormat.setInputPaths(job, workDir);
FixedLengthInputFormat format = new FixedLengthInputFormat();
List<InputSplit> splits = format.getSplits(job);
boolean exceptionThrown = false;
for (InputSplit split : splits) {
try {
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
} catch (IOException ioe) {
exceptionThrown = true;
LOG.info("Exception message:" + ioe.getMessage());
}
}
assertTrue("Exception for not setting record length:", exceptionThrown);
}
Aggregations