use of org.apache.hadoop.mapreduce.RecordReader in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method testRecordReader.
@Test
public void testRecordReader() throws IOException, InterruptedException {
List<InputSplit> splits = getSplits();
Configuration conf = getConfiguration();
// Split README by sections in Markdown.
MongoConfigUtil.setGridFSDelimiterPattern(conf, "#+");
TaskAttemptContext context = mockTaskAttemptContext(conf);
List<String> sections = new ArrayList<String>();
for (InputSplit split : splits) {
RecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
reader.initialize(split, context);
while (reader.nextKeyValue()) {
sections.add(reader.getCurrentValue().toString());
}
}
assertEquals(Arrays.asList(readmeSections), sections);
}
use of org.apache.hadoop.mapreduce.RecordReader in project hadoop by apache.
the class Chain method addMapper.
/**
* Add mapper(the first mapper) that reads input from the input
* context and writes to queue
*/
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> output, int index) throws IOException, InterruptedException {
Configuration conf = getConf(index);
Class<?> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
Class<?> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS, Object.class);
RecordReader rr = new ChainRecordReader(inputContext);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output, conf);
Mapper.Context mapperContext = createMapContext(rr, rw, (MapContext) inputContext, getConf(index));
MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
threads.add(runner);
}
use of org.apache.hadoop.mapreduce.RecordReader in project hadoop by apache.
the class Chain method runMapper.
@SuppressWarnings("unchecked")
void runMapper(TaskInputOutputContext context, int index) throws IOException, InterruptedException {
Mapper mapper = mappers.get(index);
RecordReader rr = new ChainRecordReader(context);
RecordWriter rw = new ChainRecordWriter(context);
Mapper.Context mapperContext = createMapContext(rr, rw, context, getConf(index));
mapper.run(mapperContext);
rr.close();
rw.close(context);
}
use of org.apache.hadoop.mapreduce.RecordReader in project hadoop by apache.
the class TestCombineFileInputFormat method testReinit.
@Test
public void testReinit() throws Exception {
// Test that a split containing multiple files works correctly,
// with the child RecordReader getting its initialize() method
// called a second time.
TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
Configuration conf = new Configuration();
TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskId);
// This will create a CombineFileRecordReader that itself contains a
// DummyRecordReader.
InputFormat inputFormat = new ChildRRInputFormat();
Path[] files = { new Path("file1"), new Path("file2") };
long[] lengths = { 1, 1 };
CombineFileSplit split = new CombineFileSplit(files, lengths);
RecordReader rr = inputFormat.createRecordReader(split, context);
assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
// first initialize() call comes from MapTask. We'll do it here.
rr.initialize(split, context);
// First value is first filename.
assertTrue(rr.nextKeyValue());
assertEquals("file1", rr.getCurrentValue().toString());
// The inner RR will return false, because it only emits one (k, v) pair.
// But there's another sub-split to process. This returns true to us.
assertTrue(rr.nextKeyValue());
// And the 2nd rr will have its initialize method called correctly.
assertEquals("file2", rr.getCurrentValue().toString());
// But after both child RR's have returned their singleton (k, v), this
// should also return false.
assertFalse(rr.nextKeyValue());
}
use of org.apache.hadoop.mapreduce.RecordReader in project hadoop by apache.
the class TestCombineFileInputFormat method testRecordReaderInit.
@Test
public void testRecordReaderInit() throws InterruptedException, IOException {
// Test that we properly initialize the child recordreader when
// CombineFileInputFormat and CombineFileRecordReader are used.
TaskAttemptID taskId = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
Configuration conf1 = new Configuration();
conf1.set(DUMMY_KEY, "STATE1");
TaskAttemptContext context1 = new TaskAttemptContextImpl(conf1, taskId);
// This will create a CombineFileRecordReader that itself contains a
// DummyRecordReader.
InputFormat inputFormat = new ChildRRInputFormat();
Path[] files = { new Path("file1") };
long[] lengths = { 1 };
CombineFileSplit split = new CombineFileSplit(files, lengths);
RecordReader rr = inputFormat.createRecordReader(split, context1);
assertTrue("Unexpected RR type!", rr instanceof CombineFileRecordReader);
// Verify that the initial configuration is the one being used.
// Right after construction the dummy key should have value "STATE1"
assertEquals("Invalid initial dummy key value", "STATE1", rr.getCurrentKey().toString());
// Switch the active context for the RecordReader...
Configuration conf2 = new Configuration();
conf2.set(DUMMY_KEY, "STATE2");
TaskAttemptContext context2 = new TaskAttemptContextImpl(conf2, taskId);
rr.initialize(split, context2);
// And verify that the new context is updated into the child record reader.
assertEquals("Invalid secondary dummy key value", "STATE2", rr.getCurrentKey().toString());
}
Aggregations