Search in sources :

Example 26 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class MongoPaginatingSplitterTest method testQuery.

@Test
public void testQuery() throws SplitFailedException {
    Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, uri);
    MongoConfigUtil.setRangeQueryEnabled(conf, true);
    MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
    DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 25000)), new BasicDBObject("value", new BasicDBObject("$gte", 31000)) });
    MongoConfigUtil.setQuery(conf, query);
    MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
    List<InputSplit> splits = splitter.calculateSplits();
    assertEquals(7, splits.size());
    assertSplitRange((MongoInputSplit) splits.get(0), null, 5000);
    assertSplitRange((MongoInputSplit) splits.get(1), 5000, 10000);
    assertSplitRange((MongoInputSplit) splits.get(2), 10000, 15000);
    assertSplitRange((MongoInputSplit) splits.get(3), 15000, 20000);
    assertSplitRange((MongoInputSplit) splits.get(4), 20000, 31000);
    assertSplitRange((MongoInputSplit) splits.get(5), 31000, 36000);
    assertSplitRange((MongoInputSplit) splits.get(6), 36000, null);
    // 6000 documents excluded by query.
    assertSplitsCount(collection.count() - 6000, splits);
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) Configuration(org.apache.hadoop.conf.Configuration) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Test(org.junit.Test)

Example 27 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class StandaloneMongoSplitterTest method unshardedCollectionMinMax.

@Test
public void unshardedCollectionMinMax() throws UnknownHostException, SplitFailedException {
    Configuration config = new Configuration();
    StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
    MongoConfigUtil.setInputURI(config, uri);
    DBObject inputSplitKey = BasicDBObjectBuilder.start("value", 1).get();
    MongoConfigUtil.setInputSplitKey(config, inputSplitKey);
    MongoConfigUtil.setSplitSize(config, 1);
    List<InputSplit> regularSplits = splitter.calculateSplits();
    MongoConfigUtil.setMinSplitKey(config, "{value:100}");
    MongoConfigUtil.setMaxSplitKey(config, "{value:39900}");
    List<InputSplit> inputSplits = splitter.calculateSplits();
    assertTrue("should be fewer splits with min/max set", regularSplits.size() >= inputSplits.size());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Test(org.junit.Test)

Example 28 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class StandaloneMongoSplitterTest method testFilterEmptySplits.

@Test
public void testFilterEmptySplits() throws SplitFailedException {
    Configuration config = new Configuration();
    DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 20000)), new BasicDBObject("value", new BasicDBObject("$gt", 35000)) });
    MongoConfigUtil.setInputURI(config, uri);
    MongoConfigUtil.setEnableFilterEmptySplits(config, true);
    MongoConfigUtil.setQuery(config, query);
    // 1 MB per document results in 4 splits; the 3rd one is empty per
    // the above query.
    MongoConfigUtil.setSplitSize(config, 1);
    StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
    List<InputSplit> splits = splitter.calculateSplits();
    // No splits are empty.
    for (InputSplit split : splits) {
        // Cursor is closed on the split, so copy it to create a new one.
        MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split);
        assertNotEquals(0, mis.getCursor().itcount());
    }
    assertSplitsCount(collection.count(query), splits);
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Configuration(org.apache.hadoop.conf.Configuration) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Test(org.junit.Test)

Example 29 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class TestSharded method testDirectAccess.

public void testDirectAccess() {
    DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out");
    collection.drop();
    // HADOOP61 - simulate a failed migration by having some docs from one chunk
    // also exist on another shard who does not own that chunk(duplicates)
    DB config = getMongos().getDB("config");
    DBObject chunk = config.getCollection("chunks").findOne(new BasicDBObject("shard", "sh01"));
    DBObject query = new BasicDBObject("_id", new BasicDBObject("$gte", ((DBObject) chunk.get("min")).get("_id")).append("$lt", ((DBObject) chunk.get("max")).get("_id")));
    List<DBObject> data = toList(getMongos().getDB("mongo_hadoop").getCollection("yield_historical.in").find(query));
    DBCollection destination = getShard().getDB("mongo_hadoop").getCollection("yield_historical.in");
    for (DBObject doc : data) {
        destination.insert(doc, WriteConcern.UNACKNOWLEDGED);
    }
    MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "false").inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build());
    if (isHadoopV1()) {
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(isRunTestInVm());
    compareResults(collection, getReference());
    collection.drop();
    MapReduceJob jobWithChunks = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build()).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "true");
    if (isHadoopV1()) {
        jobWithChunks.outputCommitter(MongoOutputCommitter.class);
    }
    jobWithChunks.execute(isRunTestInVm());
    compareResults(collection, getReference());
}
Also used : DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) DB(com.mongodb.DB)

Example 30 with DBObject

use of com.mongodb.DBObject in project mongo-hadoop by mongodb.

the class HiveMongoInputFormat method getRecordReader.

@Override
public RecordReader<BSONWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf conf, final Reporter reporter) throws IOException {
    // split is of type 'MongoHiveInputSplit'
    MongoHiveInputSplit mhis = (MongoHiveInputSplit) split;
    // Get column name mapping.
    Map<String, String> colToMongoNames = columnMapping(conf);
    // Add projection from Hive.
    DBObject mongoProjection = getProjection(conf, colToMongoNames);
    MongoInputSplit delegate = (MongoInputSplit) mhis.getDelegate();
    if (mongoProjection != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding MongoDB projection : " + mongoProjection);
        }
        delegate.setFields(mongoProjection);
    }
    // Filter from Hive.
    DBObject filter = getFilter(conf, colToMongoNames);
    // Combine with filter from table, if there is one.
    if (conf.get(MongoConfigUtil.INPUT_QUERY) != null) {
        DBObject tableFilter = MongoConfigUtil.getQuery(conf);
        if (null == filter) {
            filter = tableFilter;
        } else {
            BasicDBList conditions = new BasicDBList();
            conditions.add(filter);
            conditions.add(tableFilter);
            // Use $and clause so we don't overwrite any of the table
            // filter.
            filter = new BasicDBObject("$and", conditions);
        }
    }
    if (filter != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding MongoDB query: " + filter);
        }
        delegate.setQuery(filter);
    }
    // return MongoRecordReader. Delegate is of type 'MongoInputSplit'
    return new MongoRecordReader(delegate);
}
Also used : BasicDBList(com.mongodb.BasicDBList) BasicDBObject(com.mongodb.BasicDBObject) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) MongoRecordReader(com.mongodb.hadoop.mapred.input.MongoRecordReader) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject)

Aggregations

DBObject (com.mongodb.DBObject)545 BasicDBObject (com.mongodb.BasicDBObject)386 Test (org.junit.Test)214 DBCollection (com.mongodb.DBCollection)83 YearFilterPagingRequest (org.devgateway.ocds.web.rest.controller.request.YearFilterPagingRequest)54 Aggregation (org.springframework.data.mongodb.core.aggregation.Aggregation)52 ApiOperation (io.swagger.annotations.ApiOperation)47 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)46 Aggregation.newAggregation (org.springframework.data.mongodb.core.aggregation.Aggregation.newAggregation)41 DBCursor (com.mongodb.DBCursor)40 ArrayList (java.util.ArrayList)38 HashMap (java.util.HashMap)38 List (java.util.List)31 CustomProjectionOperation (org.devgateway.toolkit.persistence.mongo.aggregate.CustomProjectionOperation)31 Map (java.util.Map)26 ObjectId (org.bson.types.ObjectId)26 BasicDBList (com.mongodb.BasicDBList)24 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)20 BSONObject (org.bson.BSONObject)19 MongoException (com.mongodb.MongoException)18