use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.
the class MongoOutputReader method initialize.
@Override
public void initialize(final PipeMapRed pipeMapRed) throws IOException {
super.initialize(pipeMapRed);
in = pipeMapRed.getClientInput();
this.currentKey = new BSONWritable();
this.currentValue = new BSONWritable();
}
use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.
the class MongoUpdateInputWriter method writeValue.
@Override
public void writeValue(final Writable value) throws IOException {
if (value instanceof MongoUpdateWritable) {
// If we're writing to the input of a streaming script, just send
// back the "query" portion of the MongoUpdateWritable, so that
// mapper and reducer scripts can operate on a single document.
bsonWritable.setDoc(((MongoUpdateWritable) value).getQuery());
bsonWritable.write(output);
} else if (value instanceof BSONWritable) {
value.write(output);
} else {
throw new IOException("Unexpected Writable type :" + value);
}
}
use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.
the class BSONFileRecordWriter method write.
public void write(final K key, final V value) throws IOException {
final FSDataOutputStream destination = this.outFile;
if (value instanceof MongoUpdateWritable) {
throw new IllegalArgumentException("MongoUpdateWriteable can only be used to output to a mongo collection, " + "not a static BSON file.");
}
Object keyBSON = null;
BSONObject toEncode = null;
byte[] outputByteBuf;
if (key != null) {
keyBSON = BSONWritable.toBSON(key);
if (keyBSON != null) {
toEncode = new BasicDBObject();
}
}
if (value instanceof BSONWritable) {
if (toEncode != null) {
toEncode.putAll(((BSONWritable) value).getDoc());
} else {
toEncode = ((BSONWritable) value).getDoc();
}
} else if (value instanceof BSONObject) {
if (toEncode != null) {
toEncode.putAll((BSONObject) value);
} else {
toEncode = (BSONObject) value;
}
} else {
if (toEncode != null) {
toEncode.put("value", BSONWritable.toBSON(value));
} else {
final DBObject o = new BasicDBObject();
o.put("value", BSONWritable.toBSON(value));
toEncode = o;
}
}
if (keyBSON != null) {
toEncode.put("_id", keyBSON);
}
outputByteBuf = bsonEnc.encode(toEncode);
destination.write(outputByteBuf, 0, outputByteBuf.length);
bytesWritten += outputByteBuf.length;
writeSplitData(outputByteBuf.length, false);
}
use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.
the class MongoOutputCommitter method commitTask.
public void commitTask(final CompatUtils.TaskAttemptContext taskContext) throws IOException {
LOG.info("Committing task.");
collection = MongoConfigUtil.getOutputCollection(taskContext.getConfiguration());
// Get temporary file.
Path tempFilePath = getTaskAttemptPath(taskContext);
LOG.info("Committing from temporary file: " + tempFilePath.toString());
long filePos = 0, fileLen;
FSDataInputStream inputStream = null;
try {
FileSystem fs = FileSystem.get(taskContext.getConfiguration());
inputStream = fs.open(tempFilePath);
fileLen = fs.getFileStatus(tempFilePath).getLen();
} catch (IOException e) {
LOG.error("Could not open temporary file for committing", e);
cleanupAfterCommit(inputStream, taskContext);
throw e;
}
int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
int curBatchSize = 0;
BulkWriteOperation bulkOp;
if (MongoConfigUtil.isBulkOrdered(taskContext.getConfiguration())) {
bulkOp = collection.initializeOrderedBulkOperation();
} else {
bulkOp = collection.initializeUnorderedBulkOperation();
}
// Read Writables out of the temporary file.
BSONWritable bw = new BSONWritable();
MongoUpdateWritable muw = new MongoUpdateWritable();
while (filePos < fileLen) {
try {
// Determine writable type, and perform corresponding operation
// on MongoDB.
int mwType = inputStream.readInt();
if (MongoWritableTypes.BSON_WRITABLE == mwType) {
bw.readFields(inputStream);
bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
} else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
muw.readFields(inputStream);
DBObject query = new BasicDBObject(muw.getQuery().toMap());
DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
BulkWriteRequestBuilder writeBuilder = bulkOp.find(query);
if (muw.isReplace()) {
writeBuilder.replaceOne(modifiers);
} else if (muw.isUpsert()) {
BulkUpdateRequestBuilder updateBuilder = writeBuilder.upsert();
if (muw.isMultiUpdate()) {
updateBuilder.update(modifiers);
} else {
updateBuilder.updateOne(modifiers);
}
} else {
// No-upsert update.
if (muw.isMultiUpdate()) {
writeBuilder.update(modifiers);
} else {
writeBuilder.updateOne(modifiers);
}
}
} else {
throw new IOException("Unrecognized type: " + mwType);
}
filePos = inputStream.getPos();
// operation to be performed for the Task.
if (++curBatchSize >= maxDocs || filePos >= fileLen) {
try {
bulkOp.execute();
} catch (MongoException e) {
LOG.error("Could not write to MongoDB", e);
throw e;
}
bulkOp = collection.initializeOrderedBulkOperation();
curBatchSize = 0;
// Signal progress back to Hadoop framework so that we
// don't time out.
taskContext.progress();
}
} catch (IOException e) {
LOG.error("Error reading from temporary file", e);
throw e;
}
}
cleanupAfterCommit(inputStream, taskContext);
}
use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.
the class BSONFileInputFormatTest method enronEmails.
@Test
public void enronEmails() throws IOException {
BSONFileInputFormat inputFormat = new BSONFileInputFormat();
JobConf job = new JobConf();
String inputDirectory = new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson").getAbsoluteFile().toURI().toString();
// Hadoop 2.X
job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
// Hadoop 1.2.X
job.set("mapred.input.dir", inputDirectory);
FileSplit[] splits = inputFormat.getSplits(job, 5);
int count = 0;
BSONWritable writable = new BSONWritable();
for (FileSplit split : splits) {
RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
while (recordReader.next(null, writable)) {
count++;
}
}
assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
Aggregations