Search in sources :

Example 6 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class MongoOutputCommitter method commitTask.

public void commitTask(final CompatUtils.TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");
    collection = MongoConfigUtil.getOutputCollection(taskContext.getConfiguration());
    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }
    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    BulkWriteOperation bulkOp;
    if (MongoConfigUtil.isBulkOrdered(taskContext.getConfiguration())) {
        bulkOp = collection.initializeOrderedBulkOperation();
    } else {
        bulkOp = collection.initializeUnorderedBulkOperation();
    }
    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                BulkWriteRequestBuilder writeBuilder = bulkOp.find(query);
                if (muw.isReplace()) {
                    writeBuilder.replaceOne(modifiers);
                } else if (muw.isUpsert()) {
                    BulkUpdateRequestBuilder updateBuilder = writeBuilder.upsert();
                    if (muw.isMultiUpdate()) {
                        updateBuilder.update(modifiers);
                    } else {
                        updateBuilder.updateOne(modifiers);
                    }
                } else {
                    // No-upsert update.
                    if (muw.isMultiUpdate()) {
                        writeBuilder.update(modifiers);
                    } else {
                        writeBuilder.updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                bulkOp = collection.initializeOrderedBulkOperation();
                curBatchSize = 0;
                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }
    cleanupAfterCommit(inputStream, taskContext);
}
Also used : Path(org.apache.hadoop.fs.Path) BulkWriteOperation(com.mongodb.BulkWriteOperation) MongoException(com.mongodb.MongoException) IOException(java.io.IOException) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) BulkWriteRequestBuilder(com.mongodb.BulkWriteRequestBuilder) FileSystem(org.apache.hadoop.fs.FileSystem) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BulkUpdateRequestBuilder(com.mongodb.BulkUpdateRequestBuilder)

Aggregations

MongoUpdateWritable (com.mongodb.hadoop.io.MongoUpdateWritable)6 BasicDBObject (com.mongodb.BasicDBObject)4 DBObject (com.mongodb.DBObject)4 BSONWritable (com.mongodb.hadoop.io.BSONWritable)4 IOException (java.io.IOException)3 BSONObject (org.bson.BSONObject)2 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)1 BulkUpdateRequestBuilder (com.mongodb.BulkUpdateRequestBuilder)1 BulkWriteOperation (com.mongodb.BulkWriteOperation)1 BulkWriteRequestBuilder (com.mongodb.BulkWriteRequestBuilder)1 MongoException (com.mongodb.MongoException)1 MongoOutput (com.mongodb.hadoop.MongoOutput)1 Map (java.util.Map)1 Properties (java.util.Properties)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 PipeMapRed (org.apache.hadoop.streaming.PipeMapRed)1 ResourceSchema (org.apache.pig.ResourceSchema)1