use of com.mongodb.hadoop.mapred.BSONFileInputFormat in project mongo-hadoop by mongodb.
the class BSONFileInputFormatTest method enronEmails.
@Test
public void enronEmails() throws IOException {
BSONFileInputFormat inputFormat = new BSONFileInputFormat();
JobConf job = new JobConf();
String inputDirectory = new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson").getAbsoluteFile().toURI().toString();
// Hadoop 2.X
job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
// Hadoop 1.2.X
job.set("mapred.input.dir", inputDirectory);
FileSplit[] splits = inputFormat.getSplits(job, 5);
int count = 0;
BSONWritable writable = new BSONWritable();
for (FileSplit split : splits) {
RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
while (recordReader.next(null, writable)) {
count++;
}
}
assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
Aggregations