use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoConfigUtil method getInputSplitKey.
public static DBObject getInputSplitKey(final Configuration conf) {
try {
final String json = getInputSplitKeyPattern(conf);
final DBObject obj = (DBObject) JSON.parse(json);
if (obj == null) {
return new BasicDBObject("_id", 1);
} else {
return obj;
}
} catch (final Exception e) {
throw new IllegalArgumentException("Provided JSON String is not representable/parsable as a DBObject.", e);
}
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoCollectionSplitter method createRangeQuerySplit.
/**
* Creates an instance of {@link MongoInputSplit} whose upper and lower
* bounds are restricted by adding $gte/$lt clauses to the query
* filter. This requires that the boundaries are not compound keys, and that
* the query does not contain any keys used in the split key.
*
* @param chunkLowerBound the lower bound of the chunk (min)
* @param chunkUpperBound the upper bound of the chunk (max)
* @param query a query filtering the documents within the split
* @return a MongoInputSplit from a range query
* @throws IllegalArgumentException if the query conflicts with the chunk bounds, or the either of the bounds are compound keys.
*/
public MongoInputSplit createRangeQuerySplit(final BasicDBObject chunkLowerBound, final BasicDBObject chunkUpperBound, final BSONObject query) {
//a split without boundaries.
if (chunkLowerBound == null && chunkUpperBound == null) {
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
// The boundaries are not empty, so try to build a split using $gte/$lt.
//First check that the split contains no compound keys.
// e.g. this is valid: { _id : "foo" }
// but this is not {_id : "foo", name : "bar"}
Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1 ? chunkLowerBound.entrySet().iterator().next() : null;
Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1 ? chunkUpperBound.entrySet().iterator().next() : null;
if (minKey == null && maxKey == null) {
throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound);
}
//which overlap with the query.
if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound + "\nquery: " + query);
}
String key = null;
BasicDBObject rangeObj = new BasicDBObject();
if (minKey != null) {
key = minKey.getKey();
rangeObj.put("$gte", minKey.getValue());
}
if (maxKey != null) {
key = maxKey.getKey();
rangeObj.put("$lt", maxKey.getValue());
}
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
splitQuery.put(key, rangeObj);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MultiMongoCollectionSplitter method calculateSplits.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
List<MongoClientURI> inputURIs = MongoConfigUtil.getMongoURIs(this.getConfiguration(), MongoConfigUtil.INPUT_URI);
List<InputSplit> returnVal = new LinkedList<InputSplit>();
List<MongoSplitter> splitters = new LinkedList<MongoSplitter>();
//splitter for each implementation.
if (inputURIs.size() > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Using global split settings for multiple URIs specified.");
}
//the configuration instead.
for (MongoClientURI uri : inputURIs) {
MongoCollectionSplitter splitter;
Configuration confForThisUri = new Configuration(getConfiguration());
MongoConfigUtil.setInputURI(confForThisUri, uri);
confForThisUri.set(MongoConfigUtil.MONGO_SPLITTER_CLASS, "");
splitter = MongoSplitterFactory.getSplitterByStats(uri, confForThisUri);
splitters.add(splitter);
}
} else {
//Otherwise the user has set options per-collection.
if (LOG.isDebugEnabled()) {
LOG.debug("Loading multiple input URIs from JSON stored in " + MULTI_COLLECTION_CONF_KEY);
}
DBObject multiUriConfig = MongoConfigUtil.getDBObject(this.getConfiguration(), MULTI_COLLECTION_CONF_KEY);
if (!(multiUriConfig instanceof List)) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: Must be an array where each element " + "is an object describing the URI and config options for each split.");
}
for (Object obj : (List) multiUriConfig) {
Map<String, Object> configMap;
MongoClientURI inputURI;
Configuration confForThisUri;
try {
configMap = (Map<String, Object>) obj;
if (LOG.isDebugEnabled()) {
LOG.debug("building config from " + configMap.toString());
}
confForThisUri = MongoConfigUtil.buildConfiguration(configMap);
inputURI = MongoConfigUtil.getInputURI(confForThisUri);
} catch (ClassCastException e) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: each config item must be an " + "object with keys/values describing options for each URI.");
}
MongoSplitter splitter;
Class<? extends MongoSplitter> splitterClass = MongoConfigUtil.getSplitterClass(confForThisUri);
if (splitterClass != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Using custom Splitter class for namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//Make sure that the custom class isn't this one
if (splitterClass == MultiMongoCollectionSplitter.class) {
throw new IllegalArgumentException("Can't nest uses of MultiMongoCollectionSplitter");
}
//All clear.
MongoCollectionSplitter collectionSplitter;
collectionSplitter = (MongoCollectionSplitter) ReflectionUtils.newInstance(splitterClass, confForThisUri);
//Since we use no-arg constructor, need to inject
//configuration and input URI.
collectionSplitter.setConfiguration(confForThisUri);
splitter = collectionSplitter;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Fetching collection stats on namespace: %s.%s; hosts: %s to choose splitter implementation.", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//No class was specified, so choose one by looking at
//collection stats.
splitter = MongoSplitterFactory.getSplitterByStats(inputURI, confForThisUri);
}
splitters.add(splitter);
}
}
//compile them into one big ol' list.
for (MongoSplitter splitter : splitters) {
returnVal.addAll(splitter.calculateSplits());
}
return returnVal;
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoRecordWriter method write.
@Override
public void write(final K key, final V value) throws IOException {
if (value instanceof MongoUpdateWritable) {
outputStream.writeInt(MongoWritableTypes.MONGO_UPDATE_WRITABLE);
((MongoUpdateWritable) value).write(outputStream);
} else {
DBObject o = new BasicDBObject();
if (key instanceof BSONWritable) {
o.put("_id", ((BSONWritable) key).getDoc());
} else if (key instanceof BSONObject) {
o.put("_id", key);
} else {
o.put("_id", BSONWritable.toBSON(key));
}
if (value instanceof BSONWritable) {
o.putAll(((BSONWritable) value).getDoc());
} else if (value instanceof MongoOutput) {
((MongoOutput) value).appendAsValue(o);
} else if (value instanceof BSONObject) {
o.putAll((BSONObject) value);
} else if (value instanceof Map) {
o.putAll((Map) value);
} else {
o.put("value", BSONWritable.toBSON(value));
}
outputStream.writeInt(MongoWritableTypes.BSON_WRITABLE);
bsonWritable.setDoc(o);
bsonWritable.write(outputStream);
}
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class BookstoreTest method tagsIndex.
@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
MongoClient mongoClient = new MongoClient(uri);
DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
if (!HADOOP_VERSION.startsWith("1.")) {
job.inputFormat(BSONFileInputFormat.class);
} else {
job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
job.mapredOutputFormat(MongoOutputFormat.class);
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(false);
DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
assertNotNull(object);
List books = (List) object.get("books");
Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
Aggregations