use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class MongoCollectionSplitter method createRangeQuerySplit.
/**
* Creates an instance of {@link MongoInputSplit} whose upper and lower
* bounds are restricted by adding $gte/$lt clauses to the query
* filter. This requires that the boundaries are not compound keys, and that
* the query does not contain any keys used in the split key.
*
* @param chunkLowerBound the lower bound of the chunk (min)
* @param chunkUpperBound the upper bound of the chunk (max)
* @param query a query filtering the documents within the split
* @return a MongoInputSplit from a range query
* @throws IllegalArgumentException if the query conflicts with the chunk bounds, or the either of the bounds are compound keys.
*/
public MongoInputSplit createRangeQuerySplit(final BasicDBObject chunkLowerBound, final BasicDBObject chunkUpperBound, final BSONObject query) {
//a split without boundaries.
if (chunkLowerBound == null && chunkUpperBound == null) {
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
// The boundaries are not empty, so try to build a split using $gte/$lt.
//First check that the split contains no compound keys.
// e.g. this is valid: { _id : "foo" }
// but this is not {_id : "foo", name : "bar"}
Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1 ? chunkLowerBound.entrySet().iterator().next() : null;
Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1 ? chunkUpperBound.entrySet().iterator().next() : null;
if (minKey == null && maxKey == null) {
throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound);
}
//which overlap with the query.
if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound + "\nquery: " + query);
}
String key = null;
BasicDBObject rangeObj = new BasicDBObject();
if (minKey != null) {
key = minKey.getKey();
rangeObj.put("$gte", minKey.getValue());
}
if (maxKey != null) {
key = maxKey.getKey();
rangeObj.put("$lt", maxKey.getValue());
}
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
splitQuery.put(key, rangeObj);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class BSONFileRecordWriter method writeSplitData.
private void writeSplitData(final int docSize, final boolean force) throws IOException {
//If no split file is being written, bail out now
if (this.splitsFile == null) {
return;
}
// hit the threshold of a split, write it to the metadata file
if (force || currentSplitLen + docSize >= this.splitSize) {
BSONObject splitObj = BasicDBObjectBuilder.start().add("s", currentSplitStart).add("l", currentSplitLen).get();
byte[] encodedObj = this.bsonEnc.encode(splitObj);
this.splitsFile.write(encodedObj, 0, encodedObj.length);
//reset the split len and start
this.currentSplitLen = 0;
this.currentSplitStart = bytesWritten - docSize;
} else {
// Split hasn't hit threshold yet, just add size
this.currentSplitLen += docSize;
}
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class MongoRecordWriter method write.
@Override
public void write(final K key, final V value) throws IOException {
if (value instanceof MongoUpdateWritable) {
outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
((MongoUpdateWritable) value).write(outputStream);
} else {
DBObject o = new BasicDBObject();
if (key instanceof BSONWritable) {
o.put("_id", ((BSONWritable) key).getDoc());
} else if (key instanceof BSONObject) {
o.put("_id", key);
} else {
o.put("_id", BSONWritable.toBSON(key));
}
if (value instanceof BSONWritable) {
o.putAll(((BSONWritable) value).getDoc());
} else if (value instanceof MongoOutput) {
((MongoOutput) value).appendAsValue(o);
} else if (value instanceof BSONObject) {
o.putAll((BSONObject) value);
} else if (value instanceof Map) {
o.putAll((Map) value);
} else {
o.put("value", BSONWritable.toBSON(value));
}
outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
bsonWritable.setDoc(o);
bsonWritable.write(outputStream);
}
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class TagsReducer method reduce.
@Override
protected void reduce(final Text key, final Iterable<BSONWritable> values, final Context context) throws IOException, InterruptedException {
BasicDBObject query = new BasicDBObject("_id", key.toString());
ArrayList<BSONObject> books = new ArrayList<BSONObject>();
for (BSONWritable val : values) {
books.add(val.getDoc());
}
BasicBSONObject update = new BasicBSONObject("$set", new BasicBSONObject("books", books));
reduceResult.setQuery(query);
reduceResult.setModifiers(update);
context.write(null, reduceResult);
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class BSONSplitterTest method testCreateFileSplitFromBSON.
@Test
public void testCreateFileSplitFromBSON() throws IOException {
BSONObject splitSpec = new BasicBSONObject();
splitSpec.put("s", 0L);
splitSpec.put("l", file.getLen());
BSONFileSplit splitResult = SPLITTER.createFileSplitFromBSON(splitSpec, fs, file);
assertOneSplit(splitResult);
}
Aggregations