use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.
the class BSONFileRecordReader method init.
public void init(final InputSplit inputSplit, final Configuration configuration) throws IOException, InterruptedException {
this.configuration = configuration;
fileSplit = (FileSplit) inputSplit;
if (LOG.isDebugEnabled()) {
LOG.debug("reading split " + fileSplit);
}
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(configuration);
CompressionCodec codec = new CompressionCodecFactory(configuration).getCodec(fileSplit.getPath());
inRaw = fs.open(file, 16 * 1024 * 1024);
inRaw.seek(startingPosition == BSON_RR_POSITION_NOT_GIVEN ? fileSplit.getStart() : startingPosition);
if (codec != null) {
decompressor = CodecPool.getDecompressor(codec);
in = codec.createInputStream(inRaw, decompressor);
} else {
in = inRaw;
}
if (MongoConfigUtil.getLazyBSON(configuration)) {
callback = new LazyBSONCallback();
decoder = new LazyBSONDecoder();
} else {
callback = new BasicBSONCallback();
decoder = new BasicBSONDecoder();
}
}
use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.
the class BSONWritable method readFields.
/**
* {@inheritDoc}
*
* @see Writable#readFields(DataInput)
*/
public void readFields(final DataInput in) throws IOException {
BSONDecoder dec = new BasicBSONDecoder();
BSONCallback cb = new BasicBSONCallback();
// Read the BSON length from the start of the record
byte[] l = new byte[4];
try {
in.readFully(l);
int dataLen = Bits.readInt(l);
if (LOG.isDebugEnabled()) {
LOG.debug("*** Expected DataLen: " + dataLen);
}
byte[] data = new byte[dataLen + 4];
System.arraycopy(l, 0, data, 0, 4);
in.readFully(data, 4, dataLen - 4);
dec.decode(data, cb);
doc = (BSONObject) cb.get();
if (LOG.isTraceEnabled()) {
LOG.trace("Decoded a BSON Object: " + doc);
}
} catch (Exception e) {
/* If we can't read another length it's not an error, just return quietly. */
// TODO - Figure out how to gracefully mark this as an empty
LOG.info("No Length Header available." + e);
doc = new BasicDBObject();
}
}
use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.
the class MongoUpdateWritable method readFields.
/**
* {@inheritDoc}
*
* @see Writable#readFields(DataInput)
*/
public void readFields(final DataInput in) throws IOException {
BSONDecoder dec = new BasicBSONDecoder();
BSONCallback cb = new BasicBSONCallback();
// Read the BSON length from the start of the record
byte[] l = new byte[4];
try {
in.readFully(l);
int dataLen = Bits.readInt(l);
byte[] data = new byte[dataLen + 4];
System.arraycopy(l, 0, data, 0, 4);
in.readFully(data, 4, dataLen - 4);
dec.decode(data, cb);
query = (BasicBSONObject) cb.get();
in.readFully(l);
dataLen = Bits.readInt(l);
data = new byte[dataLen + 4];
System.arraycopy(l, 0, data, 0, 4);
in.readFully(data, 4, dataLen - 4);
dec.decode(data, cb);
modifiers = (BasicBSONObject) cb.get();
upsert = in.readBoolean();
multiUpdate = in.readBoolean();
replace = in.readBoolean();
} catch (Exception e) {
/* If we can't read another length it's not an error, just return quietly. */
// TODO - Figure out how to gracefully mark this as an empty
LOG.info("No Length Header available." + e);
query = new BasicDBObject();
modifiers = new BasicDBObject();
}
}
use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.
the class MongoInputSplit method readFields.
@Override
public void readFields(final DataInput in) throws IOException {
BSONCallback cb = new BasicBSONCallback();
BSONObject spec;
byte[] l = new byte[4];
in.readFully(l);
int dataLen = org.bson.io.Bits.readInt(l);
byte[] data = new byte[dataLen + 4];
System.arraycopy(l, 0, data, 0, 4);
in.readFully(data, 4, dataLen - 4);
_bsonDecoder.decode(data, cb);
spec = (BSONObject) cb.get();
setInputURI(new MongoClientURI((String) spec.get("inputURI")));
if (spec.get("authURI") != null) {
setAuthURI(new MongoClientURI((String) spec.get("authURI")));
} else {
setAuthURI((MongoClientURI) null);
}
setKeyField((String) spec.get("keyField"));
BSONObject temp = (BSONObject) spec.get("fields");
setFields(temp != null ? new BasicDBObject(temp.toMap()) : null);
temp = (BSONObject) spec.get("query");
setQuery(temp != null ? new BasicDBObject(temp.toMap()) : null);
temp = (BSONObject) spec.get("sort");
setSort(temp != null ? new BasicDBObject(temp.toMap()) : null);
temp = (BSONObject) spec.get("min");
setMin(temp != null ? new BasicDBObject(temp.toMap()) : null);
temp = (BSONObject) spec.get("max");
setMax(temp != null ? new BasicDBObject(temp.toMap()) : null);
setLimit((Integer) spec.get("limit"));
setSkip((Integer) spec.get("skip"));
setNoTimeout((Boolean) spec.get("notimeout"));
}
use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.
the class BSONSplitterTest method testReadSplitsForFile.
@Test
public void testReadSplitsForFile() throws IOException {
Configuration readSplitsConfig = new Configuration(conf);
SPLITTER.setConf(readSplitsConfig);
// Only one split if reading splits is disabled.
MongoConfigUtil.setBSONReadSplits(readSplitsConfig, false);
SPLITTER.readSplitsForFile(file);
List<BSONFileSplit> splitsList = SPLITTER.getAllSplits();
assertEquals(1, splitsList.size());
BSONFileSplit theSplit = splitsList.get(0);
assertOneSplit(theSplit);
// Actually compute splits.
MongoConfigUtil.setBSONReadSplits(readSplitsConfig, true);
// Set split size to be really small so we get a lot of them.
readSplitsConfig.set("mapreduce.input.fileinputformat.split.maxsize", "5000");
SPLITTER.readSplitsForFile(file);
splitsList = SPLITTER.getAllSplits();
// Value found through manual inspection.
assertEquals(40, splitsList.size());
// Make sure that all splits start on document boundaries.
FSDataInputStream stream = fs.open(file.getPath());
BSONDecoder decoder = new BasicBSONDecoder();
BSONCallback callback = new BasicBSONCallback();
for (BSONFileSplit split : splitsList) {
stream.seek(split.getStart());
decoder.decode(stream, callback);
BSONObject doc = (BSONObject) callback.get();
assertTrue(doc.containsField("_id"));
}
}
Aggregations