Search in sources :

Example 16 with BSONWritable

use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.

the class MongoOutputReader method initialize.

@Override
public void initialize(final PipeMapRed pipeMapRed) throws IOException {
    super.initialize(pipeMapRed);
    in = pipeMapRed.getClientInput();
    this.currentKey = new BSONWritable();
    this.currentValue = new BSONWritable();
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable)

Example 17 with BSONWritable

use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.

the class MongoUpdateInputWriter method writeValue.

@Override
public void writeValue(final Writable value) throws IOException {
    if (value instanceof MongoUpdateWritable) {
        // If we're writing to the input of a streaming script, just send
        // back the "query" portion of the MongoUpdateWritable, so that
        // mapper and reducer scripts can operate on a single document.
        bsonWritable.setDoc(((MongoUpdateWritable) value).getQuery());
        bsonWritable.write(output);
    } else if (value instanceof BSONWritable) {
        value.write(output);
    } else {
        throw new IOException("Unexpected Writable type :" + value);
    }
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) IOException(java.io.IOException)

Example 18 with BSONWritable

use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.

the class BSONFileRecordWriter method write.

public void write(final K key, final V value) throws IOException {
    final FSDataOutputStream destination = this.outFile;
    if (value instanceof MongoUpdateWritable) {
        throw new IllegalArgumentException("MongoUpdateWriteable can only be used to output to a mongo collection, " + "not a static BSON file.");
    }
    Object keyBSON = null;
    BSONObject toEncode = null;
    byte[] outputByteBuf;
    if (key != null) {
        keyBSON = BSONWritable.toBSON(key);
        if (keyBSON != null) {
            toEncode = new BasicDBObject();
        }
    }
    if (value instanceof BSONWritable) {
        if (toEncode != null) {
            toEncode.putAll(((BSONWritable) value).getDoc());
        } else {
            toEncode = ((BSONWritable) value).getDoc();
        }
    } else if (value instanceof BSONObject) {
        if (toEncode != null) {
            toEncode.putAll((BSONObject) value);
        } else {
            toEncode = (BSONObject) value;
        }
    } else {
        if (toEncode != null) {
            toEncode.put("value", BSONWritable.toBSON(value));
        } else {
            final DBObject o = new BasicDBObject();
            o.put("value", BSONWritable.toBSON(value));
            toEncode = o;
        }
    }
    if (keyBSON != null) {
        toEncode.put("_id", keyBSON);
    }
    outputByteBuf = bsonEnc.encode(toEncode);
    destination.write(outputByteBuf, 0, outputByteBuf.length);
    bytesWritten += outputByteBuf.length;
    writeSplitData(outputByteBuf.length, false);
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) BSONObject(org.bson.BSONObject) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject) BSONObject(org.bson.BSONObject) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject)

Example 19 with BSONWritable

use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.

the class MongoOutputCommitter method commitTask.

public void commitTask(final CompatUtils.TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");
    collection = MongoConfigUtil.getOutputCollection(taskContext.getConfiguration());
    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }
    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    BulkWriteOperation bulkOp;
    if (MongoConfigUtil.isBulkOrdered(taskContext.getConfiguration())) {
        bulkOp = collection.initializeOrderedBulkOperation();
    } else {
        bulkOp = collection.initializeUnorderedBulkOperation();
    }
    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                BulkWriteRequestBuilder writeBuilder = bulkOp.find(query);
                if (muw.isReplace()) {
                    writeBuilder.replaceOne(modifiers);
                } else if (muw.isUpsert()) {
                    BulkUpdateRequestBuilder updateBuilder = writeBuilder.upsert();
                    if (muw.isMultiUpdate()) {
                        updateBuilder.update(modifiers);
                    } else {
                        updateBuilder.updateOne(modifiers);
                    }
                } else {
                    // No-upsert update.
                    if (muw.isMultiUpdate()) {
                        writeBuilder.update(modifiers);
                    } else {
                        writeBuilder.updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                bulkOp = collection.initializeOrderedBulkOperation();
                curBatchSize = 0;
                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }
    cleanupAfterCommit(inputStream, taskContext);
}
Also used : Path(org.apache.hadoop.fs.Path) BulkWriteOperation(com.mongodb.BulkWriteOperation) MongoException(com.mongodb.MongoException) IOException(java.io.IOException) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) BulkWriteRequestBuilder(com.mongodb.BulkWriteRequestBuilder) FileSystem(org.apache.hadoop.fs.FileSystem) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BulkUpdateRequestBuilder(com.mongodb.BulkUpdateRequestBuilder)

Example 20 with BSONWritable

use of com.mongodb.hadoop.io.BSONWritable in project mongo-hadoop by mongodb.

the class BSONFileInputFormatTest method enronEmails.

@Test
public void enronEmails() throws IOException {
    BSONFileInputFormat inputFormat = new BSONFileInputFormat();
    JobConf job = new JobConf();
    String inputDirectory = new File(EXAMPLE_DATA_HOME, "/dump/enron_mail/messages.bson").getAbsoluteFile().toURI().toString();
    // Hadoop 2.X
    job.set("mapreduce.input.fileinputformat.inputdir", inputDirectory);
    // Hadoop 1.2.X
    job.set("mapred.input.dir", inputDirectory);
    FileSplit[] splits = inputFormat.getSplits(job, 5);
    int count = 0;
    BSONWritable writable = new BSONWritable();
    for (FileSplit split : splits) {
        RecordReader<NullWritable, BSONWritable> recordReader = inputFormat.getRecordReader(split, job, null);
        while (recordReader.next(null, writable)) {
            count++;
        }
    }
    assertEquals("There are 501513 messages in the enron corpus", 501513, count);
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Aggregations

BSONWritable (com.mongodb.hadoop.io.BSONWritable)21 BasicBSONObject (org.bson.BasicBSONObject)14 Test (org.junit.Test)13 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)11 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)11 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)11 BasicDBObject (com.mongodb.BasicDBObject)4 MongoUpdateWritable (com.mongodb.hadoop.io.MongoUpdateWritable)4 ArrayList (java.util.ArrayList)4 BSONObject (org.bson.BSONObject)4 DBObject (com.mongodb.DBObject)3 File (java.io.File)2 IOException (java.io.IOException)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 NullWritable (org.apache.hadoop.io.NullWritable)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 JobConf (org.apache.hadoop.mapred.JobConf)2 BulkUpdateRequestBuilder (com.mongodb.BulkUpdateRequestBuilder)1 BulkWriteOperation (com.mongodb.BulkWriteOperation)1