use of com.mongodb.hadoop.mapred.input.BSONFileSplit in project mongo-hadoop by mongodb.
the class BSONFileInputFormat method getSplits.
@Override
public FileSplit[] getSplits(final JobConf job, final int numSplits) throws IOException {
BSONSplitter splitter = new BSONSplitter();
splitter.setConf(job);
FileStatus[] inputFiles = listStatus(job);
List<FileSplit> results = new ArrayList<FileSplit>();
for (FileStatus file : inputFiles) {
FileSystem fs = FileSystem.get(file.getPath().toUri(), job);
if (!isSplitable(fs, file.getPath())) {
LOG.info("File " + file.getPath() + " is compressed so " + "cannot be split.");
org.apache.hadoop.mapreduce.lib.input.FileSplit delegate = splitter.createFileSplit(file, fs, 0L, file.getLen());
results.add(new BSONFileSplit(delegate.getPath(), delegate.getStart(), delegate.getLength(), delegate.getLocations()));
continue;
}
splitter.setInputPath(file.getPath());
Path splitFilePath = getSplitsFilePath(file.getPath(), job);
try {
splitter.loadSplitsFromSplitFile(file, splitFilePath);
} catch (BSONSplitter.NoSplitFileException nsfe) {
if (LOG.isDebugEnabled()) {
LOG.debug(format("No split file for %s; building split file", file.getPath()));
}
splitter.readSplitsForFile(file);
}
if (LOG.isDebugEnabled()) {
LOG.debug(format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
}
for (org.apache.hadoop.mapreduce.lib.input.FileSplit split : splitter.getAllSplits()) {
BSONFileSplit fsplit = new BSONFileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations());
fsplit.setKeyField(MongoConfigUtil.getInputKey(job));
results.add(fsplit);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(format("Total of %d found.", results.size()));
}
return results.toArray(new BSONFileSplit[results.size()]);
}
use of com.mongodb.hadoop.mapred.input.BSONFileSplit in project mongo-hadoop by mongodb.
the class BSONFileInputFormat method getRecordReader.
@Override
public RecordReader<NullWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf job, final Reporter reporter) throws IOException {
FileSplit fileSplit = (FileSplit) split;
FileSystem fs = FileSystem.get(fileSplit.getPath().toUri(), job);
if (split instanceof BSONFileSplit || !isSplitable(fs, fileSplit.getPath())) {
BSONFileRecordReader reader = new BSONFileRecordReader();
reader.initialize(split, job);
return reader;
}
// Split was not created by BSONSplitter.
BSONSplitter splitter = new BSONSplitter();
splitter.setConf(job);
splitter.setInputPath(fileSplit.getPath());
org.apache.hadoop.mapreduce.lib.input.FileSplit newStyleFileSplit = new org.apache.hadoop.mapreduce.lib.input.FileSplit(fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLocations());
long start = splitter.getStartingPositionForSplit(newStyleFileSplit);
BSONFileRecordReader reader = new BSONFileRecordReader(start);
reader.initialize(fileSplit, job);
return reader;
}
Aggregations