Search in sources :

Example 21 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class MongoConfigUtil method getInputSplitKey.

public static DBObject getInputSplitKey(final Configuration conf) {
    try {
        final String json = getInputSplitKeyPattern(conf);
        final DBObject obj = (DBObject) JSON.parse(json);
        if (obj == null) {
            return new BasicDBObject("_id", 1);
        } else {
            return obj;
        }
    } catch (final Exception e) {
        throw new IllegalArgumentException("Provided JSON String is not representable/parsable as a DBObject.", e);
    }
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException)

Example 22 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class MongoCollectionSplitter method createRangeQuerySplit.

/**
     * Creates an instance of {@link MongoInputSplit} whose upper and lower
     * bounds are restricted by adding $gte/$lt clauses to the query
     * filter. This requires that the boundaries are not compound keys, and that
     * the query does not contain any keys used in the split key.
     *
     * @param chunkLowerBound the lower bound of the chunk (min)
     * @param chunkUpperBound the upper bound of the chunk (max)
     * @param query a query filtering the documents within the split
     * @return a MongoInputSplit from a range query
     * @throws IllegalArgumentException if the query conflicts with the chunk bounds, or the either of the bounds are compound keys.
     */
public MongoInputSplit createRangeQuerySplit(final BasicDBObject chunkLowerBound, final BasicDBObject chunkUpperBound, final BSONObject query) {
    //a split without boundaries.
    if (chunkLowerBound == null && chunkUpperBound == null) {
        DBObject splitQuery = new BasicDBObject();
        splitQuery.putAll(query);
        MongoInputSplit split = new MongoInputSplit(getConfiguration());
        split.setQuery(splitQuery);
        return split;
    }
    // The boundaries are not empty, so try to build a split using $gte/$lt.
    //First check that the split contains no compound keys.
    // e.g. this is valid: { _id : "foo" }
    // but this is not {_id : "foo", name : "bar"}
    Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1 ? chunkLowerBound.entrySet().iterator().next() : null;
    Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1 ? chunkUpperBound.entrySet().iterator().next() : null;
    if (minKey == null && maxKey == null) {
        throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n" + "min:  " + chunkLowerBound + "\nmax:  " + chunkUpperBound);
    }
    //which overlap with the query.
    if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
        throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n" + "min:  " + chunkLowerBound + "\nmax:  " + chunkUpperBound + "\nquery:  " + query);
    }
    String key = null;
    BasicDBObject rangeObj = new BasicDBObject();
    if (minKey != null) {
        key = minKey.getKey();
        rangeObj.put("$gte", minKey.getValue());
    }
    if (maxKey != null) {
        key = maxKey.getKey();
        rangeObj.put("$lt", maxKey.getValue());
    }
    DBObject splitQuery = new BasicDBObject();
    splitQuery.putAll(query);
    splitQuery.put(key, rangeObj);
    MongoInputSplit split = new MongoInputSplit(getConfiguration());
    split.setQuery(splitQuery);
    return split;
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) BasicDBObject(com.mongodb.BasicDBObject) BSONObject(org.bson.BSONObject) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject)

Example 23 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class MultiMongoCollectionSplitter method calculateSplits.

@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
    List<MongoClientURI> inputURIs = MongoConfigUtil.getMongoURIs(this.getConfiguration(), MongoConfigUtil.INPUT_URI);
    List<InputSplit> returnVal = new LinkedList<InputSplit>();
    List<MongoSplitter> splitters = new LinkedList<MongoSplitter>();
    //splitter for each implementation.
    if (inputURIs.size() > 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Using global split settings for multiple URIs specified.");
        }
        //the configuration instead.
        for (MongoClientURI uri : inputURIs) {
            MongoCollectionSplitter splitter;
            Configuration confForThisUri = new Configuration(getConfiguration());
            MongoConfigUtil.setInputURI(confForThisUri, uri);
            confForThisUri.set(MongoConfigUtil.MONGO_SPLITTER_CLASS, "");
            splitter = MongoSplitterFactory.getSplitterByStats(uri, confForThisUri);
            splitters.add(splitter);
        }
    } else {
        //Otherwise the user has set options per-collection.
        if (LOG.isDebugEnabled()) {
            LOG.debug("Loading multiple input URIs from JSON stored in " + MULTI_COLLECTION_CONF_KEY);
        }
        DBObject multiUriConfig = MongoConfigUtil.getDBObject(this.getConfiguration(), MULTI_COLLECTION_CONF_KEY);
        if (!(multiUriConfig instanceof List)) {
            throw new IllegalArgumentException("Invalid JSON format in multi uri config key: Must be an array where each element " + "is an object describing the URI and config options for each split.");
        }
        for (Object obj : (List) multiUriConfig) {
            Map<String, Object> configMap;
            MongoClientURI inputURI;
            Configuration confForThisUri;
            try {
                configMap = (Map<String, Object>) obj;
                if (LOG.isDebugEnabled()) {
                    LOG.debug("building config from " + configMap.toString());
                }
                confForThisUri = MongoConfigUtil.buildConfiguration(configMap);
                inputURI = MongoConfigUtil.getInputURI(confForThisUri);
            } catch (ClassCastException e) {
                throw new IllegalArgumentException("Invalid JSON format in multi uri config key: each config item must be an " + "object with keys/values describing options for each URI.");
            }
            MongoSplitter splitter;
            Class<? extends MongoSplitter> splitterClass = MongoConfigUtil.getSplitterClass(confForThisUri);
            if (splitterClass != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(format("Using custom Splitter class for namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
                }
                //Make sure that the custom class isn't this one
                if (splitterClass == MultiMongoCollectionSplitter.class) {
                    throw new IllegalArgumentException("Can't nest uses of MultiMongoCollectionSplitter");
                }
                //All clear.
                MongoCollectionSplitter collectionSplitter;
                collectionSplitter = (MongoCollectionSplitter) ReflectionUtils.newInstance(splitterClass, confForThisUri);
                //Since we use no-arg constructor, need to inject
                //configuration and input URI.
                collectionSplitter.setConfiguration(confForThisUri);
                splitter = collectionSplitter;
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(format("Fetching collection stats on namespace: %s.%s; hosts: %s to choose splitter implementation.", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
                }
                //No class was specified, so choose one by looking at
                //collection stats.
                splitter = MongoSplitterFactory.getSplitterByStats(inputURI, confForThisUri);
            }
            splitters.add(splitter);
        }
    }
    //compile them into one big ol' list.
    for (MongoSplitter splitter : splitters) {
        returnVal.addAll(splitter.calculateSplits());
    }
    return returnVal;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MongoClientURI(com.mongodb.MongoClientURI) DBObject(com.mongodb.DBObject) LinkedList(java.util.LinkedList) List(java.util.List) LinkedList(java.util.LinkedList) DBObject(com.mongodb.DBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 24 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class MongoRecordWriter method write.

@Override
public void write(final K key, final V value) throws IOException {
    if (value instanceof MongoUpdateWritable) {
        outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
        ((MongoUpdateWritable) value).write(outputStream);
    } else {
        DBObject o = new BasicDBObject();
        if (key instanceof BSONWritable) {
            o.put("_id", ((BSONWritable) key).getDoc());
        } else if (key instanceof BSONObject) {
            o.put("_id", key);
        } else {
            o.put("_id", BSONWritable.toBSON(key));
        }
        if (value instanceof BSONWritable) {
            o.putAll(((BSONWritable) value).getDoc());
        } else if (value instanceof MongoOutput) {
            ((MongoOutput) value).appendAsValue(o);
        } else if (value instanceof BSONObject) {
            o.putAll((BSONObject) value);
        } else if (value instanceof Map) {
            o.putAll((Map) value);
        } else {
            o.put("value", BSONWritable.toBSON(value));
        }
        outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
        bsonWritable.setDoc(o);
        bsonWritable.write(outputStream);
    }
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicDBObject(com.mongodb.BasicDBObject) MongoUpdateWritable(com.mongodb.hadoop.io.MongoUpdateWritable) BSONObject(org.bson.BSONObject) MongoOutput(com.mongodb.hadoop.MongoOutput) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) Map(java.util.Map)

Example 25 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class BookstoreTest method tagsIndex.

@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
    MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
    MongoClient mongoClient = new MongoClient(uri);
    DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
    MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
    if (!HADOOP_VERSION.startsWith("1.")) {
        job.inputFormat(BSONFileInputFormat.class);
    } else {
        job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
        job.mapredOutputFormat(MongoOutputFormat.class);
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(false);
    DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
    assertNotNull(object);
    List books = (List) object.get("books");
    Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
Also used : MongoClient(com.mongodb.MongoClient) DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MongoClientURI(com.mongodb.MongoClientURI) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) List(java.util.List) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Aggregations

DBObject (com.mongodb.DBObject)545 BasicDBObject (com.mongodb.BasicDBObject)386 Test (org.junit.Test)214 DBCollection (com.mongodb.DBCollection)83 YearFilterPagingRequest (org.devgateway.ocds.web.rest.controller.request.YearFilterPagingRequest)54 Aggregation (org.springframework.data.mongodb.core.aggregation.Aggregation)52 ApiOperation (io.swagger.annotations.ApiOperation)47 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)46 Aggregation.newAggregation (org.springframework.data.mongodb.core.aggregation.Aggregation.newAggregation)41 DBCursor (com.mongodb.DBCursor)40 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)38 List (java.util.List)31 CustomProjectionOperation (org.devgateway.toolkit.persistence.mongo.aggregate.CustomProjectionOperation)31 Map (java.util.Map)26 ObjectId (org.bson.types.ObjectId)26 BasicDBList (com.mongodb.BasicDBList)24 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)20 BSONObject (org.bson.BSONObject)19 MongoException (com.mongodb.MongoException)18