Search in sources :

Example 1 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class MongoRecordWriter method write.

@Override
public void write(final K key, final V value) throws IOException {
    if (value instanceof MongoUpdateWritable) {
        outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
        ((MongoUpdateWritable) value).write(outputStream);
    } else {
        DBObject o = new BasicDBObject();
        if (key instanceof BSONWritable) {
            o.put("_id", ((BSONWritable) key).getDoc());
        } else if (key instanceof BSONObject) {
            o.put("_id", key);
        } else {
            o.put("_id", BSONWritable.toBSON(key));
        }
        if (value instanceof BSONWritable) {
            o.putAll(((BSONWritable) value).getDoc());
        } else if (value instanceof MongoOutput) {
            ((MongoOutput) value).appendAsValue(o);
        } else if (value instanceof BSONObject) {
            o.putAll((BSONObject) value);
        } else if (value instanceof Map) {
            o.putAll((Map) value);
        } else {
            o.put("value", BSONWritable.toBSON(value));
        }
        outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
        bsonWritable.setDoc(o);
        bsonWritable.write(outputStream);
    }
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) BSONObject(org.bson.BSONObject) MongoOutput(com.mongodb.hadoop.MongoOutput) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) Map(java.util.Map)

Example 2 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class MongoOutputCommitter method commitTask.

public void commitTask(final CompatUtils.TaskAttemptContext taskContext) throws IOException {
    LOG.info("Committing task.");
    collection = MongoConfigUtil.getOutputCollection(taskContext.getConfiguration());
    // Get temporary file.
    Path tempFilePath = getTaskAttemptPath(taskContext);
    LOG.info("Committing from temporary file: " + tempFilePath.toString());
    long filePos = 0, fileLen;
    FSDataInputStream inputStream = null;
    try {
        FileSystem fs = FileSystem.get(taskContext.getConfiguration());
        inputStream = fs.open(tempFilePath);
        fileLen = fs.getFileStatus(tempFilePath).getLen();
    } catch (IOException e) {
        LOG.error("Could not open temporary file for committing", e);
        cleanupAfterCommit(inputStream, taskContext);
        throw e;
    }
    int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration());
    int curBatchSize = 0;
    BulkWriteOperation bulkOp;
    if (MongoConfigUtil.isBulkOrdered(taskContext.getConfiguration())) {
        bulkOp = collection.initializeOrderedBulkOperation();
    } else {
        bulkOp = collection.initializeUnorderedBulkOperation();
    }
    // Read Writables out of the temporary file.
    BSONWritable bw = new BSONWritable();
    MongoUpdateWritable muw = new MongoUpdateWritable();
    while (filePos < fileLen) {
        try {
            // Determine writable type, and perform corresponding operation
            // on MongoDB.
            int mwType = inputStream.readInt();
            if (MongoWritableTypes.BSON_WRITABLE == mwType) {
                bw.readFields(inputStream);
                bulkOp.insert(new BasicDBObject(bw.getDoc().toMap()));
            } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) {
                muw.readFields(inputStream);
                DBObject query = new BasicDBObject(muw.getQuery().toMap());
                DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap());
                BulkWriteRequestBuilder writeBuilder = bulkOp.find(query);
                if (muw.isReplace()) {
                    writeBuilder.replaceOne(modifiers);
                } else if (muw.isUpsert()) {
                    BulkUpdateRequestBuilder updateBuilder = writeBuilder.upsert();
                    if (muw.isMultiUpdate()) {
                        updateBuilder.update(modifiers);
                    } else {
                        updateBuilder.updateOne(modifiers);
                    }
                } else {
                    // No-upsert update.
                    if (muw.isMultiUpdate()) {
                        writeBuilder.update(modifiers);
                    } else {
                        writeBuilder.updateOne(modifiers);
                    }
                }
            } else {
                throw new IOException("Unrecognized type: " + mwType);
            }
            filePos = inputStream.getPos();
            // operation to be performed for the Task.
            if (++curBatchSize >= maxDocs || filePos >= fileLen) {
                try {
                    bulkOp.execute();
                } catch (MongoException e) {
                    LOG.error("Could not write to MongoDB", e);
                    throw e;
                }
                bulkOp = collection.initializeOrderedBulkOperation();
                curBatchSize = 0;
                // Signal progress back to Hadoop framework so that we
                // don't time out.
                taskContext.progress();
            }
        } catch (IOException e) {
            LOG.error("Error reading from temporary file", e);
            throw e;
        }
    }
    cleanupAfterCommit(inputStream, taskContext);
}
Also used : Path(org.apache.hadoop.fs.Path) BulkWriteOperation(com.mongodb.BulkWriteOperation) MongoException(com.mongodb.MongoException) IOException(java.io.IOException) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) BulkWriteRequestBuilder(com.mongodb.BulkWriteRequestBuilder) FileSystem(org.apache.hadoop.fs.FileSystem) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BulkUpdateRequestBuilder(com.mongodb.BulkUpdateRequestBuilder)

Example 3 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class BSONFileRecordWriter method write.

public void write(final K key, final V value) throws IOException {
    final FSDataOutputStream destination = this.outFile;
    if (value instanceof MongoUpdateWritable) {
        throw new IllegalArgumentException("MongoUpdateWriteable can only be used to output to a mongo collection, " + "not a static BSON file.");
    }
    Object keyBSON = null;
    BSONObject toEncode = null;
    byte[] outputByteBuf;
    if (key != null) {
        keyBSON = BSONWritable.toBSON(key);
        if (keyBSON != null) {
            toEncode = new BasicDBObject();
        }
    }
    if (value instanceof BSONWritable) {
        if (toEncode != null) {
            toEncode.putAll(((BSONWritable) value).getDoc());
        } else {
            toEncode = ((BSONWritable) value).getDoc();
        }
    } else if (value instanceof BSONObject) {
        if (toEncode != null) {
            toEncode.putAll((BSONObject) value);
        } else {
            toEncode = (BSONObject) value;
        }
    } else {
        if (toEncode != null) {
            toEncode.put("value", BSONWritable.toBSON(value));
        } else {
            final DBObject o = new BasicDBObject();
            o.put("value", BSONWritable.toBSON(value));
            toEncode = o;
        }
    }
    if (keyBSON != null) {
        toEncode.put("_id", keyBSON);
    }
    outputByteBuf = bsonEnc.encode(toEncode);
    destination.write(outputByteBuf, 0, outputByteBuf.length);
    bytesWritten += outputByteBuf.length;
    writeSplitData(outputByteBuf.length, false);
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) BSONObject(org.bson.BSONObject) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject) BSONObject(org.bson.BSONObject) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject)

Example 4 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class MongoUpdateStorage method prepareToWrite.

@Override
public void prepareToWrite(final RecordWriter writer) throws IOException {
    // noinspection unchecked
    recordWriter = (MongoRecordWriter<?, MongoUpdateWritable>) writer;
    LOG.info("Preparing to write to " + recordWriter);
    if (recordWriter == null) {
        throw new IOException("Invalid Record Writer");
    }
    UDFContext context = UDFContext.getUDFContext();
    Properties p = context.getUDFProperties(getClass(), new String[] { signature });
    /*
         * In determining the schema to use, the user-defined schema should take
         * precedence over the "inferred" schema
         */
    if (schemaStr != null) {
        try {
            schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
        } catch (Exception e) {
            LOG.error(e.getMessage(), e);
        }
    } else {
        String s = p.getProperty(SCHEMA_SIGNATURE);
        if (s == null) {
            throw new IOException("Could not find schema in UDF context. You'd have to explicitly specify a Schema.");
        }
        schema = new ResourceSchema(Utils.getSchemaFromString(s));
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) UDFContext(org.apache.pig.impl.util.UDFContext) IOException(java.io.IOException) Properties(java.util.Properties) IOException(java.io.IOException)

Example 5 with MongoUpdateWritable

use of com.mongodb.hadoop.io.MongoUpdateWritable in project mongo-hadoop by mongodb.

the class MongoUpdateOutputReaderTest method testUpdate.

@Test
public void testUpdate() throws IOException {
    BasicBSONObject query = new BasicDBObject("i", 42);
    BasicBSONObject modifiers = new BasicDBObject("$set", new BasicDBObject("a", "b"));
    DBObject update = new BasicDBObjectBuilder().add("_id", query).add("modifiers", modifiers).push("options").add("multi", true).add("upsert", false).pop().get();
    MongoUpdateWritable muw = new MongoUpdateWritable(query, modifiers, false, true, false);
    PipeMapRed pipeMapRed = mock(PipeMapRed.class);
    when(pipeMapRed.getClientInput()).thenReturn(inputFromBSONObject(update));
    MongoUpdateOutputReader reader = new MongoUpdateOutputReader();
    reader.initialize(pipeMapRed);
    assertTrue(reader.readKeyValue());
    assertEquals(muw, reader.getCurrentValue());
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) BasicDBObject(com.mongodb.BasicDBObject) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) PipeMapRed(org.apache.hadoop.streaming.PipeMapRed) Test(org.junit.Test)

Aggregations

MongoUpdateWritable (com.mongodb.hadoop.io.MongoUpdateWritable)6 BasicDBObject (com.mongodb.BasicDBObject)4 DBObject (com.mongodb.DBObject)4 BSONWritable (com.mongodb.hadoop.io.BSONWritable)4 IOException (java.io.IOException)3 BSONObject (org.bson.BSONObject)2 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)1 BulkUpdateRequestBuilder (com.mongodb.BulkUpdateRequestBuilder)1 BulkWriteOperation (com.mongodb.BulkWriteOperation)1 BulkWriteRequestBuilder (com.mongodb.BulkWriteRequestBuilder)1 MongoException (com.mongodb.MongoException)1 MongoOutput (com.mongodb.hadoop.MongoOutput)1 Map (java.util.Map)1 Properties (java.util.Properties)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 PipeMapRed (org.apache.hadoop.streaming.PipeMapRed)1 ResourceSchema (org.apache.pig.ResourceSchema)1