Search in sources :

Example 1 with BasicBSONCallback

use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.

the class BSONFileRecordReader method init.

public void init(final InputSplit inputSplit, final Configuration configuration) throws IOException, InterruptedException {
    this.configuration = configuration;
    fileSplit = (FileSplit) inputSplit;
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    CompressionCodec codec = new CompressionCodecFactory(configuration).getCodec(fileSplit.getPath());
    inRaw = fs.open(file, 16 * 1024 * 1024);
    inRaw.seek(startingPosition == BSON_RR_POSITION_NOT_GIVEN ? fileSplit.getStart() : startingPosition);
    if (codec != null) {
        decompressor = CodecPool.getDecompressor(codec);
        in = codec.createInputStream(inRaw, decompressor);
    } else {
        in = inRaw;
    }
    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BasicBSONCallback(org.bson.BasicBSONCallback) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBSONCallback(org.bson.LazyBSONCallback) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) LazyBSONDecoder(org.bson.LazyBSONDecoder) BasicBSONDecoder(org.bson.BasicBSONDecoder)

Example 2 with BasicBSONCallback

use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.

the class BSONWritable method readFields.

/**
 * {@inheritDoc}
 *
 * @see Writable#readFields(DataInput)
 */
public void readFields(final DataInput in) throws IOException {
    BSONDecoder dec = new BasicBSONDecoder();
    BSONCallback cb = new BasicBSONCallback();
    // Read the BSON length from the start of the record
    byte[] l = new byte[4];
    try {
        in.readFully(l);
        int dataLen = Bits.readInt(l);
        if (LOG.isDebugEnabled()) {
            LOG.debug("*** Expected DataLen: " + dataLen);
        }
        byte[] data = new byte[dataLen + 4];
        System.arraycopy(l, 0, data, 0, 4);
        in.readFully(data, 4, dataLen - 4);
        dec.decode(data, cb);
        doc = (BSONObject) cb.get();
        if (LOG.isTraceEnabled()) {
            LOG.trace("Decoded a BSON Object: " + doc);
        }
    } catch (Exception e) {
        /* If we can't read another length it's not an error, just return quietly. */
        // TODO - Figure out how to gracefully mark this as an empty
        LOG.info("No Length Header available." + e);
        doc = new BasicDBObject();
    }
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) BSONDecoder(org.bson.BSONDecoder) BSONCallback(org.bson.BSONCallback) BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) IOException(java.io.IOException)

Example 3 with BasicBSONCallback

use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.

the class MongoUpdateWritable method readFields.

/**
 * {@inheritDoc}
 *
 * @see Writable#readFields(DataInput)
 */
public void readFields(final DataInput in) throws IOException {
    BSONDecoder dec = new BasicBSONDecoder();
    BSONCallback cb = new BasicBSONCallback();
    // Read the BSON length from the start of the record
    byte[] l = new byte[4];
    try {
        in.readFully(l);
        int dataLen = Bits.readInt(l);
        byte[] data = new byte[dataLen + 4];
        System.arraycopy(l, 0, data, 0, 4);
        in.readFully(data, 4, dataLen - 4);
        dec.decode(data, cb);
        query = (BasicBSONObject) cb.get();
        in.readFully(l);
        dataLen = Bits.readInt(l);
        data = new byte[dataLen + 4];
        System.arraycopy(l, 0, data, 0, 4);
        in.readFully(data, 4, dataLen - 4);
        dec.decode(data, cb);
        modifiers = (BasicBSONObject) cb.get();
        upsert = in.readBoolean();
        multiUpdate = in.readBoolean();
        replace = in.readBoolean();
    } catch (Exception e) {
        /* If we can't read another length it's not an error, just return quietly. */
        // TODO - Figure out how to gracefully mark this as an empty
        LOG.info("No Length Header available." + e);
        query = new BasicDBObject();
        modifiers = new BasicDBObject();
    }
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) BSONDecoder(org.bson.BSONDecoder) BSONCallback(org.bson.BSONCallback) BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) IOException(java.io.IOException)

Example 4 with BasicBSONCallback

use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.

the class MongoInputSplit method readFields.

@Override
public void readFields(final DataInput in) throws IOException {
    BSONCallback cb = new BasicBSONCallback();
    BSONObject spec;
    byte[] l = new byte[4];
    in.readFully(l);
    int dataLen = org.bson.io.Bits.readInt(l);
    byte[] data = new byte[dataLen + 4];
    System.arraycopy(l, 0, data, 0, 4);
    in.readFully(data, 4, dataLen - 4);
    _bsonDecoder.decode(data, cb);
    spec = (BSONObject) cb.get();
    setInputURI(new MongoClientURI((String) spec.get("inputURI")));
    if (spec.get("authURI") != null) {
        setAuthURI(new MongoClientURI((String) spec.get("authURI")));
    } else {
        setAuthURI((MongoClientURI) null);
    }
    setKeyField((String) spec.get("keyField"));
    BSONObject temp = (BSONObject) spec.get("fields");
    setFields(temp != null ? new BasicDBObject(temp.toMap()) : null);
    temp = (BSONObject) spec.get("query");
    setQuery(temp != null ? new BasicDBObject(temp.toMap()) : null);
    temp = (BSONObject) spec.get("sort");
    setSort(temp != null ? new BasicDBObject(temp.toMap()) : null);
    temp = (BSONObject) spec.get("min");
    setMin(temp != null ? new BasicDBObject(temp.toMap()) : null);
    temp = (BSONObject) spec.get("max");
    setMax(temp != null ? new BasicDBObject(temp.toMap()) : null);
    setLimit((Integer) spec.get("limit"));
    setSkip((Integer) spec.get("skip"));
    setNoTimeout((Boolean) spec.get("notimeout"));
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) BasicBSONCallback(org.bson.BasicBSONCallback) MongoClientURI(com.mongodb.MongoClientURI) BSONObject(org.bson.BSONObject) BSONCallback(org.bson.BSONCallback) BasicBSONCallback(org.bson.BasicBSONCallback)

Example 5 with BasicBSONCallback

use of org.bson.BasicBSONCallback in project mongo-hadoop by mongodb.

the class BSONSplitterTest method testReadSplitsForFile.

@Test
public void testReadSplitsForFile() throws IOException {
    Configuration readSplitsConfig = new Configuration(conf);
    SPLITTER.setConf(readSplitsConfig);
    // Only one split if reading splits is disabled.
    MongoConfigUtil.setBSONReadSplits(readSplitsConfig, false);
    SPLITTER.readSplitsForFile(file);
    List<BSONFileSplit> splitsList = SPLITTER.getAllSplits();
    assertEquals(1, splitsList.size());
    BSONFileSplit theSplit = splitsList.get(0);
    assertOneSplit(theSplit);
    // Actually compute splits.
    MongoConfigUtil.setBSONReadSplits(readSplitsConfig, true);
    // Set split size to be really small so we get a lot of them.
    readSplitsConfig.set("mapreduce.input.fileinputformat.split.maxsize", "5000");
    SPLITTER.readSplitsForFile(file);
    splitsList = SPLITTER.getAllSplits();
    // Value found through manual inspection.
    assertEquals(40, splitsList.size());
    // Make sure that all splits start on document boundaries.
    FSDataInputStream stream = fs.open(file.getPath());
    BSONDecoder decoder = new BasicBSONDecoder();
    BSONCallback callback = new BasicBSONCallback();
    for (BSONFileSplit split : splitsList) {
        stream.seek(split.getStart());
        decoder.decode(stream, callback);
        BSONObject doc = (BSONObject) callback.get();
        assertTrue(doc.containsField("_id"));
    }
}
Also used : BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) BSONDecoder(org.bson.BSONDecoder) Configuration(org.apache.hadoop.conf.Configuration) BSONFileSplit(com.mongodb.hadoop.input.BSONFileSplit) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BSONCallback(org.bson.BSONCallback) BasicBSONCallback(org.bson.BasicBSONCallback) BasicBSONDecoder(org.bson.BasicBSONDecoder) Test(org.junit.Test)

Aggregations

BasicBSONCallback (org.bson.BasicBSONCallback)5 BSONCallback (org.bson.BSONCallback)4 BasicBSONDecoder (org.bson.BasicBSONDecoder)4 BasicDBObject (com.mongodb.BasicDBObject)3 BSONDecoder (org.bson.BSONDecoder)3 IOException (java.io.IOException)2 BSONObject (org.bson.BSONObject)2 MongoClientURI (com.mongodb.MongoClientURI)1 BSONFileSplit (com.mongodb.hadoop.input.BSONFileSplit)1 Configuration (org.apache.hadoop.conf.Configuration)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)1 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)1 BasicBSONObject (org.bson.BasicBSONObject)1 LazyBSONCallback (org.bson.LazyBSONCallback)1 LazyBSONDecoder (org.bson.LazyBSONDecoder)1 Test (org.junit.Test)1