use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class MongoCollectionSplitter method createRangeQuerySplit.
/**
* Creates an instance of {@link MongoInputSplit} whose upper and lower
* bounds are restricted by adding $gte/$lt clauses to the query
* filter. This requires that the boundaries are not compound keys, and that
* the query does not contain any keys used in the split key.
*
* @param chunkLowerBound the lower bound of the chunk (min)
* @param chunkUpperBound the upper bound of the chunk (max)
* @param query a query filtering the documents within the split
* @return a MongoInputSplit from a range query
* @throws IllegalArgumentException if the query conflicts with the chunk bounds, or the either of the bounds are compound keys.
*/
public MongoInputSplit createRangeQuerySplit(final BasicDBObject chunkLowerBound, final BasicDBObject chunkUpperBound, final BSONObject query) {
//a split without boundaries.
if (chunkLowerBound == null && chunkUpperBound == null) {
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
// The boundaries are not empty, so try to build a split using $gte/$lt.
//First check that the split contains no compound keys.
// e.g. this is valid: { _id : "foo" }
// but this is not {_id : "foo", name : "bar"}
Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1 ? chunkLowerBound.entrySet().iterator().next() : null;
Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1 ? chunkUpperBound.entrySet().iterator().next() : null;
if (minKey == null && maxKey == null) {
throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound);
}
//which overlap with the query.
if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound + "\nquery: " + query);
}
String key = null;
BasicDBObject rangeObj = new BasicDBObject();
if (minKey != null) {
key = minKey.getKey();
rangeObj.put("$gte", minKey.getValue());
}
if (maxKey != null) {
key = maxKey.getKey();
rangeObj.put("$lt", maxKey.getValue());
}
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
splitQuery.put(key, rangeObj);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class MongoRecordWriter method write.
@Override
public void write(final K key, final V value) throws IOException {
if (value instanceof MongoUpdateWritable) {
outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
((MongoUpdateWritable) value).write(outputStream);
} else {
DBObject o = new BasicDBObject();
if (key instanceof BSONWritable) {
o.put("_id", ((BSONWritable) key).getDoc());
} else if (key instanceof BSONObject) {
o.put("_id", key);
} else {
o.put("_id", BSONWritable.toBSON(key));
}
if (value instanceof BSONWritable) {
o.putAll(((BSONWritable) value).getDoc());
} else if (value instanceof MongoOutput) {
((MongoOutput) value).appendAsValue(o);
} else if (value instanceof BSONObject) {
o.putAll((BSONObject) value);
} else if (value instanceof Map) {
o.putAll((Map) value);
} else {
o.put("value", BSONWritable.toBSON(value));
}
outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
bsonWritable.setDoc(o);
bsonWritable.write(outputStream);
}
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class BookstoreTest method tagsIndex.
@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
MongoClient mongoClient = new MongoClient(uri);
DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
if (!HADOOP_VERSION.startsWith("1.")) {
job.inputFormat(BSONFileInputFormat.class);
} else {
job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
job.mapredOutputFormat(MongoOutputFormat.class);
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(false);
DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
assertNotNull(object);
List books = (List) object.get("books");
Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class TagsReducer method reduce.
@Override
protected void reduce(final Text key, final Iterable<BSONWritable> values, final Context context) throws IOException, InterruptedException {
BasicDBObject query = new BasicDBObject("_id", key.toString());
ArrayList<BSONObject> books = new ArrayList<BSONObject>();
for (BSONWritable val : values) {
books.add(val.getDoc());
}
BasicBSONObject update = new BasicBSONObject("$set", new BasicBSONObject("books", books));
reduceResult.setQuery(query);
reduceResult.setModifiers(update);
context.write(null, reduceResult);
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class MongoPaginatingSplitterTest method testQuery.
@Test
public void testQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 25000)), new BasicDBObject("value", new BasicDBObject("$gte", 31000)) });
MongoConfigUtil.setQuery(conf, query);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(7, splits.size());
assertSplitRange((MongoInputSplit) splits.get(0), null, 5000);
assertSplitRange((MongoInputSplit) splits.get(1), 5000, 10000);
assertSplitRange((MongoInputSplit) splits.get(2), 10000, 15000);
assertSplitRange((MongoInputSplit) splits.get(3), 15000, 20000);
assertSplitRange((MongoInputSplit) splits.get(4), 20000, 31000);
assertSplitRange((MongoInputSplit) splits.get(5), 31000, 36000);
assertSplitRange((MongoInputSplit) splits.get(6), 36000, null);
// 6000 documents excluded by query.
assertSplitsCount(collection.count() - 6000, splits);
}
Aggregations