use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoPaginatingSplitterTest method testQuery.
@Test
public void testQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 25000)), new BasicDBObject("value", new BasicDBObject("$gte", 31000)) });
MongoConfigUtil.setQuery(conf, query);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(7, splits.size());
assertSplitRange((MongoInputSplit) splits.get(0), null, 5000);
assertSplitRange((MongoInputSplit) splits.get(1), 5000, 10000);
assertSplitRange((MongoInputSplit) splits.get(2), 10000, 15000);
assertSplitRange((MongoInputSplit) splits.get(3), 15000, 20000);
assertSplitRange((MongoInputSplit) splits.get(4), 20000, 31000);
assertSplitRange((MongoInputSplit) splits.get(5), 31000, 36000);
assertSplitRange((MongoInputSplit) splits.get(6), 36000, null);
// 6000 documents excluded by query.
assertSplitsCount(collection.count() - 6000, splits);
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class StandaloneMongoSplitterTest method unshardedCollectionMinMax.
@Test
public void unshardedCollectionMinMax() throws UnknownHostException, SplitFailedException {
Configuration config = new Configuration();
StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
MongoConfigUtil.setInputURI(config, uri);
DBObject inputSplitKey = BasicDBObjectBuilder.start("value", 1).get();
MongoConfigUtil.setInputSplitKey(config, inputSplitKey);
MongoConfigUtil.setSplitSize(config, 1);
List<InputSplit> regularSplits = splitter.calculateSplits();
MongoConfigUtil.setMinSplitKey(config, "{value:100}");
MongoConfigUtil.setMaxSplitKey(config, "{value:39900}");
List<InputSplit> inputSplits = splitter.calculateSplits();
assertTrue("should be fewer splits with min/max set", regularSplits.size() >= inputSplits.size());
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class StandaloneMongoSplitterTest method testFilterEmptySplits.
@Test
public void testFilterEmptySplits() throws SplitFailedException {
Configuration config = new Configuration();
DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 20000)), new BasicDBObject("value", new BasicDBObject("$gt", 35000)) });
MongoConfigUtil.setInputURI(config, uri);
MongoConfigUtil.setEnableFilterEmptySplits(config, true);
MongoConfigUtil.setQuery(config, query);
// 1 MB per document results in 4 splits; the 3rd one is empty per
// the above query.
MongoConfigUtil.setSplitSize(config, 1);
StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
List<InputSplit> splits = splitter.calculateSplits();
// No splits are empty.
for (InputSplit split : splits) {
// Cursor is closed on the split, so copy it to create a new one.
MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split);
assertNotEquals(0, mis.getCursor().itcount());
}
assertSplitsCount(collection.count(query), splits);
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class TestSharded method testDirectAccess.
public void testDirectAccess() {
DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out");
collection.drop();
// HADOOP61 - simulate a failed migration by having some docs from one chunk
// also exist on another shard who does not own that chunk(duplicates)
DB config = getMongos().getDB("config");
DBObject chunk = config.getCollection("chunks").findOne(new BasicDBObject("shard", "sh01"));
DBObject query = new BasicDBObject("_id", new BasicDBObject("$gte", ((DBObject) chunk.get("min")).get("_id")).append("$lt", ((DBObject) chunk.get("max")).get("_id")));
List<DBObject> data = toList(getMongos().getDB("mongo_hadoop").getCollection("yield_historical.in").find(query));
DBCollection destination = getShard().getDB("mongo_hadoop").getCollection("yield_historical.in");
for (DBObject doc : data) {
destination.insert(doc, WriteConcern.UNACKNOWLEDGED);
}
MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "false").inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build());
if (isHadoopV1()) {
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(isRunTestInVm());
compareResults(collection, getReference());
collection.drop();
MapReduceJob jobWithChunks = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build()).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "true");
if (isHadoopV1()) {
jobWithChunks.outputCommitter(MongoOutputCommitter.class);
}
jobWithChunks.execute(isRunTestInVm());
compareResults(collection, getReference());
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class HiveMongoInputFormat method getRecordReader.
@Override
public RecordReader<BSONWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf conf, final Reporter reporter) throws IOException {
// split is of type 'MongoHiveInputSplit'
MongoHiveInputSplit mhis = (MongoHiveInputSplit) split;
// Get column name mapping.
Map<String, String> colToMongoNames = columnMapping(conf);
// Add projection from Hive.
DBObject mongoProjection = getProjection(conf, colToMongoNames);
MongoInputSplit delegate = (MongoInputSplit) mhis.getDelegate();
if (mongoProjection != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB projection : " + mongoProjection);
}
delegate.setFields(mongoProjection);
}
// Filter from Hive.
DBObject filter = getFilter(conf, colToMongoNames);
// Combine with filter from table, if there is one.
if (conf.get(MongoConfigUtil.INPUT_QUERY) != null) {
DBObject tableFilter = MongoConfigUtil.getQuery(conf);
if (null == filter) {
filter = tableFilter;
} else {
BasicDBList conditions = new BasicDBList();
conditions.add(filter);
conditions.add(tableFilter);
// Use $and clause so we don't overwrite any of the table
// filter.
filter = new BasicDBObject("$and", conditions);
}
}
if (filter != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB query: " + filter);
}
delegate.setQuery(filter);
}
// return MongoRecordReader. Delegate is of type 'MongoInputSplit'
return new MongoRecordReader(delegate);
}
Aggregations