Search in sources :

Example 16 with DBCollection

use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.

the class StandaloneMongoSplitter method calculateSplits.

@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
    final DBObject splitKey = MongoConfigUtil.getInputSplitKey(getConfiguration());
    final DBObject splitKeyMax = MongoConfigUtil.getMaxSplitKey(getConfiguration());
    final DBObject splitKeyMin = MongoConfigUtil.getMinSplitKey(getConfiguration());
    final int splitSize = MongoConfigUtil.getSplitSize(getConfiguration());
    final MongoClientURI inputURI;
    DBCollection inputCollection = null;
    final ArrayList<InputSplit> returnVal;
    try {
        inputURI = MongoConfigUtil.getInputURI(getConfiguration());
        MongoClientURI authURI = MongoConfigUtil.getAuthURI(getConfiguration());
        if (authURI != null) {
            inputCollection = MongoConfigUtil.getCollectionWithAuth(inputURI, authURI);
        } else {
            inputCollection = MongoConfigUtil.getCollection(inputURI);
        }
        returnVal = new ArrayList<InputSplit>();
        final String ns = inputCollection.getFullName();
        if (LOG.isDebugEnabled()) {
            LOG.debug(String.format("Running splitVector on namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
        }
        final DBObject cmd = BasicDBObjectBuilder.start("splitVector", ns).add("keyPattern", splitKey).add("min", splitKeyMin).add("max", splitKeyMax).add("force", false).add("maxChunkSize", splitSize).get();
        CommandResult data;
        boolean ok = true;
        try {
            data = inputCollection.getDB().getSisterDB(inputURI.getDatabase()).command(cmd, ReadPreference.primary());
        } catch (final MongoException e) {
            // 2.0 servers throw exceptions rather than info in a CommandResult
            data = null;
            LOG.info(e.getMessage(), e);
            if (e.getMessage().contains("unrecognized command: splitVector")) {
                ok = false;
            } else {
                throw e;
            }
        }
        if (data != null) {
            if (data.containsField("$err")) {
                throw new SplitFailedException("Error calculating splits: " + data);
            } else if (!data.get("ok").equals(1.0)) {
                ok = false;
            }
        }
        if (!ok) {
            final CommandResult stats = inputCollection.getStats();
            if (stats.containsField("primary")) {
                final DBCursor shards = inputCollection.getDB().getSisterDB("config").getCollection("shards").find(new BasicDBObject("_id", stats.getString("primary")));
                try {
                    if (shards.hasNext()) {
                        final DBObject shard = shards.next();
                        final String host = ((String) shard.get("host")).replace(shard.get("_id") + "/", "");
                        final MongoClientURI shardHost;
                        if (authURI != null) {
                            shardHost = new MongoClientURIBuilder(authURI).host(host).build();
                        } else {
                            shardHost = new MongoClientURIBuilder(inputURI).host(host).build();
                        }
                        MongoClient shardClient = null;
                        try {
                            shardClient = new MongoClient(shardHost);
                            data = shardClient.getDB(shardHost.getDatabase()).command(cmd, ReadPreference.primary());
                        } catch (final Exception e) {
                            LOG.error(e.getMessage(), e);
                        } finally {
                            if (shardClient != null) {
                                shardClient.close();
                            }
                        }
                    }
                } finally {
                    shards.close();
                }
            }
            if (data != null && !data.get("ok").equals(1.0)) {
                throw new SplitFailedException("Unable to calculate input splits: " + data.get("errmsg"));
            }
        }
        // Comes in a format where "min" and "max" are implicit
        // and each entry is just a boundary key; not ranged
        final BasicDBList splitData = (BasicDBList) data.get("splitKeys");
        if (splitData.size() == 0) {
            LOG.warn("WARNING: No Input Splits were calculated by the split code. Proceeding with a *single* split. Data may be too" + " small, try lowering 'mongo.input.split_size' if this is undesirable.");
        }
        // Lower boundary of the first min split
        BasicDBObject lastKey = null;
        // If splitKeyMin was given, use it as first boundary.
        if (!splitKeyMin.toMap().isEmpty()) {
            lastKey = new BasicDBObject(splitKeyMin.toMap());
        }
        for (final Object aSplitData : splitData) {
            final BasicDBObject currentKey = (BasicDBObject) aSplitData;
            returnVal.add(createSplitFromBounds(lastKey, currentKey));
            lastKey = currentKey;
        }
        BasicDBObject maxKey = null;
        // If splitKeyMax was given, use it as last boundary.
        if (!splitKeyMax.toMap().isEmpty()) {
            maxKey = new BasicDBObject(splitKeyMax.toMap());
        }
        // Last max split
        final MongoInputSplit lastSplit = createSplitFromBounds(lastKey, maxKey);
        returnVal.add(lastSplit);
    } finally {
        if (inputCollection != null) {
            MongoConfigUtil.close(inputCollection.getDB().getMongo());
        }
    }
    if (MongoConfigUtil.isFilterEmptySplitsEnabled(getConfiguration())) {
        return filterEmptySplits(returnVal);
    }
    return returnVal;
}
Also used : MongoException(com.mongodb.MongoException) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) MongoClientURI(com.mongodb.MongoClientURI) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) MongoException(com.mongodb.MongoException) CommandResult(com.mongodb.CommandResult) DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClient(com.mongodb.MongoClient) BasicDBList(com.mongodb.BasicDBList) DBCursor(com.mongodb.DBCursor) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit)

Example 17 with DBCollection

use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.

the class MongoSplitterFactory method getSplitterByStats.

public static MongoCollectionSplitter getSplitterByStats(final MongoClientURI uri, final Configuration config) {
    /* Looks at the collection in mongo.input.uri
         * and choose an implementation based on what's in there.  */
    MongoCollectionSplitter returnVal;
    // big split for the whole collection.
    if (!MongoConfigUtil.createInputSplits(config)) {
        returnVal = new SingleMongoSplitter(config);
    } else {
        MongoClientURI authURI = MongoConfigUtil.getAuthURI(config);
        CommandResult stats;
        DBCollection coll = null;
        CommandResult buildInfo;
        try {
            if (authURI != null) {
                coll = MongoConfigUtil.getCollectionWithAuth(uri, authURI);
                stats = coll.getStats();
                LOG.info("Retrieved Collection stats:" + stats);
            } else {
                coll = MongoConfigUtil.getCollection(uri);
                stats = coll.getStats();
            }
            buildInfo = coll.getDB().command("buildinfo");
        } finally {
            if (coll != null) {
                MongoConfigUtil.close(coll.getDB().getMongo());
            }
        }
        if (!stats.getBoolean("ok", false)) {
            throw new RuntimeException("Unable to calculate input splits from collection stats: " + stats.getString("errmsg"));
        }
        if (!stats.getBoolean("sharded", false)) {
            // Prefer SampleSplitter.
            List versionArray = (List) buildInfo.get("versionArray");
            boolean sampleOperatorSupported = ((Integer) versionArray.get(0) > 3 || ((Integer) versionArray.get(0) == 3 && (Integer) versionArray.get(1) >= 2));
            if (sampleOperatorSupported) {
                returnVal = new SampleSplitter(config);
            } else {
                returnVal = new StandaloneMongoSplitter(config);
            }
        } else {
            // Collection is sharded
            if (MongoConfigUtil.isShardChunkedSplittingEnabled(config)) {
                // Creates one split per chunk. 
                returnVal = new ShardChunkMongoSplitter(config);
            } else if (MongoConfigUtil.canReadSplitsFromShards(config)) {
                // Creates one split per shard, but ignores chunk bounds. 
                // Reads from shards directly (bypassing mongos).
                // Not usually recommended.
                returnVal = new ShardMongoSplitter(config);
            } else {
                //Not configured to use chunks or shards -
                //so treat this the same as if it was an unsharded collection
                returnVal = new StandaloneMongoSplitter(config);
            }
        }
    }
    return returnVal;
}
Also used : DBCollection(com.mongodb.DBCollection) MongoClientURI(com.mongodb.MongoClientURI) List(java.util.List) CommandResult(com.mongodb.CommandResult)

Example 18 with DBCollection

use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.

the class ShardChunkMongoSplitter method calculateSplits.

// Generate one split per chunk.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
    DB configDB = getConfigDB();
    DBCollection chunksCollection = configDB.getCollection("chunks");
    Map<String, List<String>> shardsMap;
    try {
        shardsMap = getShardsMap();
    } catch (Exception e) {
        //so abort the splitting
        throw new SplitFailedException("Couldn't get shards information from config server", e);
    }
    return calculateSplitsFromChunks(chunksCollection.find().toArray(), shardsMap);
}
Also used : DBCollection(com.mongodb.DBCollection) ArrayList(java.util.ArrayList) List(java.util.List) DB(com.mongodb.DB)

Example 19 with DBCollection

use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.

the class BookstoreTest method tagsIndex.

@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
    MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
    MongoClient mongoClient = new MongoClient(uri);
    DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
    MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
    if (!HADOOP_VERSION.startsWith("1.")) {
        job.inputFormat(BSONFileInputFormat.class);
    } else {
        job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
        job.mapredOutputFormat(MongoOutputFormat.class);
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(false);
    DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
    assertNotNull(object);
    List books = (List) object.get("books");
    Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
Also used : MongoClient(com.mongodb.MongoClient) DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MongoClientURI(com.mongodb.MongoClientURI) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) List(java.util.List) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Example 20 with DBCollection

use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.

the class MongoInputSplit method getCursor.

public DBCursor getCursor() {
    if (this.cursor == null) {
        DBCollection coll;
        if (this.authURI != null) {
            coll = MongoConfigUtil.getCollectionWithAuth(this.inputURI, this.authURI);
        } else {
            coll = MongoConfigUtil.getCollection(this.inputURI);
        }
        this.cursor = coll.find(this.query, this.fields).sort(this.sort);
        if (this.notimeout) {
            this.cursor.setOptions(Bytes.QUERYOPTION_NOTIMEOUT);
        }
        if (this.min != null) {
            this.cursor.addSpecial("$min", this.min);
        }
        if (this.max != null) {
            this.cursor.addSpecial("$max", this.max);
        }
        if (skip != null) {
            cursor = cursor.skip(skip);
        }
        if (limit != null) {
            cursor = cursor.limit(limit);
        }
    }
    return this.cursor;
}
Also used : DBCollection(com.mongodb.DBCollection)

Aggregations

DBCollection (com.mongodb.DBCollection)165 DBObject (com.mongodb.DBObject)90 BasicDBObject (com.mongodb.BasicDBObject)86 Test (org.junit.Test)69 UsingDataSet (com.lordofthejars.nosqlunit.annotation.UsingDataSet)29 DBCursor (com.mongodb.DBCursor)23 MongoException (com.mongodb.MongoException)22 DB (com.mongodb.DB)20 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)17 FailedDBOperationException (edu.umass.cs.gnscommon.exceptions.server.FailedDBOperationException)12 JSONObject (org.json.JSONObject)12 MongoClientURI (com.mongodb.MongoClientURI)11 QueryBuilder (com.mongodb.QueryBuilder)10 List (java.util.List)10 Map (java.util.Map)10 Stopwatch (com.google.common.base.Stopwatch)9 WriteResult (com.mongodb.WriteResult)9 HashMap (java.util.HashMap)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8