use of com.mongodb.hadoop.input.GridFSSplit in project mongo-hadoop by mongodb.
the class GridFSInputFormat method getSplits.
@Override
public List<InputSplit> getSplits(final JobContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBCollection inputCollection = MongoConfigUtil.getInputCollection(conf);
MongoClientURI inputURI = MongoConfigUtil.getInputURI(conf);
GridFS gridFS = new GridFS(inputCollection.getDB(), inputCollection.getName());
DBObject query = MongoConfigUtil.getQuery(conf);
List<InputSplit> splits = new LinkedList<InputSplit>();
for (GridFSDBFile file : gridFS.find(query)) {
// One split per file.
if (MongoConfigUtil.isGridFSWholeFileSplit(conf)) {
splits.add(new GridFSSplit(inputURI, (ObjectId) file.getId(), (int) file.getChunkSize(), file.getLength()));
} else // One split per file chunk.
{
for (int chunk = 0; chunk < file.numChunks(); ++chunk) {
splits.add(new GridFSSplit(inputURI, (ObjectId) file.getId(), (int) file.getChunkSize(), file.getLength(), chunk));
}
}
}
LOG.debug("Found GridFS splits: " + splits);
return splits;
}
Aggregations