use of com.mongodb.hadoop.input.BSONFileRecordReader in project mongo-hadoop by mongodb.
the class BSONFileRecordReaderTest method testGetCurrentKey.
@Test
public void testGetCurrentKey() throws Exception {
URI path = BSONFileRecordReaderTest.class.getResource("/bookstore-dump/inventory.bson").toURI();
File file = new File(path);
// Default case: "_id" is used as inputKey.
BSONFileRecordReader reader = new BSONFileRecordReader();
BSONFileSplit split = new BSONFileSplit(new Path(path), 0, file.length(), new String[0]);
JobConf conf = new JobConf();
reader.init(split, conf);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), new ObjectId("4d2a6084c6237b412fcd5597"));
// Use a nested field as inputKey.
reader = new BSONFileRecordReader();
split = new BSONFileSplit(new Path(path), 0, file.length(), new String[0]);
split.setKeyField("price.msrp");
reader.init(split, conf);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), 33);
// Use a key within an array as the inputKey.
reader = new BSONFileRecordReader();
split = new BSONFileSplit(new Path(path), 0, file.length(), new String[0]);
split.setKeyField("tags.0");
reader.init(split, conf);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), "html5");
}
use of com.mongodb.hadoop.input.BSONFileRecordReader in project mongo-hadoop by mongodb.
the class BSONFileInputFormat method createRecordReader.
@Override
public RecordReader createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException {
if (split instanceof BSONFileSplit) {
// Split was created by BSONSplitter and starts at a whole document.
return new BSONFileRecordReader();
}
// Split was not created by BSONSplitter, and we need to find the
// first document to begin iterating.
FileSplit fileSplit = (FileSplit) split;
BSONSplitter splitter = new BSONSplitter();
splitter.setConf(context.getConfiguration());
splitter.setInputPath(fileSplit.getPath());
return new BSONFileRecordReader(splitter.getStartingPositionForSplit(fileSplit));
}
Aggregations