use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoConfigUtil method getDBObject.
public static DBObject getDBObject(final Configuration conf, final String key) {
try {
final String json = conf.get(key);
final DBObject obj = (DBObject) JSON.parse(json);
if (obj == null) {
return new BasicDBObject();
} else {
return obj;
}
} catch (final Exception e) {
throw new IllegalArgumentException("Provided JSON String is not representable/parseable as a DBObject.", e);
}
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoConfigUtil method setJSON.
/**
* Helper for providing a JSON string as a value for a setting.
* @param conf the Configuration
* @param key the key for the setting
* @param value the JSON string value
*/
public static void setJSON(final Configuration conf, final String key, final String value) {
try {
final Object dbObj = JSON.parse(value);
setDBObject(conf, key, (DBObject) dbObj);
} catch (final Exception e) {
LOG.error("Cannot parse JSON...", e);
throw new IllegalArgumentException("Provided JSON String is not representable/parseable as a DBObject.", e);
}
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoConfigUtil method getInputSplitKey.
public static DBObject getInputSplitKey(final Configuration conf) {
try {
final String json = getInputSplitKeyPattern(conf);
final DBObject obj = (DBObject) JSON.parse(json);
if (obj == null) {
return new BasicDBObject("_id", 1);
} else {
return obj;
}
} catch (final Exception e) {
throw new IllegalArgumentException("Provided JSON String is not representable/parsable as a DBObject.", e);
}
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MongoCollectionSplitter method createRangeQuerySplit.
/**
* Creates an instance of {@link MongoInputSplit} whose upper and lower
* bounds are restricted by adding $gte/$lt clauses to the query
* filter. This requires that the boundaries are not compound keys, and that
* the query does not contain any keys used in the split key.
*
* @param chunkLowerBound the lower bound of the chunk (min)
* @param chunkUpperBound the upper bound of the chunk (max)
* @param query a query filtering the documents within the split
* @return a MongoInputSplit from a range query
* @throws IllegalArgumentException if the query conflicts with the chunk bounds, or the either of the bounds are compound keys.
*/
public MongoInputSplit createRangeQuerySplit(final BasicDBObject chunkLowerBound, final BasicDBObject chunkUpperBound, final BSONObject query) {
//a split without boundaries.
if (chunkLowerBound == null && chunkUpperBound == null) {
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
// The boundaries are not empty, so try to build a split using $gte/$lt.
//First check that the split contains no compound keys.
// e.g. this is valid: { _id : "foo" }
// but this is not {_id : "foo", name : "bar"}
Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1 ? chunkLowerBound.entrySet().iterator().next() : null;
Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1 ? chunkUpperBound.entrySet().iterator().next() : null;
if (minKey == null && maxKey == null) {
throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound);
}
//which overlap with the query.
if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n" + "min: " + chunkLowerBound + "\nmax: " + chunkUpperBound + "\nquery: " + query);
}
String key = null;
BasicDBObject rangeObj = new BasicDBObject();
if (minKey != null) {
key = minKey.getKey();
rangeObj.put("$gte", minKey.getValue());
}
if (maxKey != null) {
key = maxKey.getKey();
rangeObj.put("$lt", maxKey.getValue());
}
DBObject splitQuery = new BasicDBObject();
splitQuery.putAll(query);
splitQuery.put(key, rangeObj);
MongoInputSplit split = new MongoInputSplit(getConfiguration());
split.setQuery(splitQuery);
return split;
}
use of com.mongodb.DBObject in project mongo-hadoop by mongodb.
the class MultiMongoCollectionSplitter method calculateSplits.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
List<MongoClientURI> inputURIs = MongoConfigUtil.getMongoURIs(this.getConfiguration(), MongoConfigUtil.INPUT_URI);
List<InputSplit> returnVal = new LinkedList<InputSplit>();
List<MongoSplitter> splitters = new LinkedList<MongoSplitter>();
//splitter for each implementation.
if (inputURIs.size() > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Using global split settings for multiple URIs specified.");
}
//the configuration instead.
for (MongoClientURI uri : inputURIs) {
MongoCollectionSplitter splitter;
Configuration confForThisUri = new Configuration(getConfiguration());
MongoConfigUtil.setInputURI(confForThisUri, uri);
confForThisUri.set(MongoConfigUtil.MONGO_SPLITTER_CLASS, "");
splitter = MongoSplitterFactory.getSplitterByStats(uri, confForThisUri);
splitters.add(splitter);
}
} else {
//Otherwise the user has set options per-collection.
if (LOG.isDebugEnabled()) {
LOG.debug("Loading multiple input URIs from JSON stored in " + MULTI_COLLECTION_CONF_KEY);
}
DBObject multiUriConfig = MongoConfigUtil.getDBObject(this.getConfiguration(), MULTI_COLLECTION_CONF_KEY);
if (!(multiUriConfig instanceof List)) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: Must be an array where each element " + "is an object describing the URI and config options for each split.");
}
for (Object obj : (List) multiUriConfig) {
Map<String, Object> configMap;
MongoClientURI inputURI;
Configuration confForThisUri;
try {
configMap = (Map<String, Object>) obj;
if (LOG.isDebugEnabled()) {
LOG.debug("building config from " + configMap.toString());
}
confForThisUri = MongoConfigUtil.buildConfiguration(configMap);
inputURI = MongoConfigUtil.getInputURI(confForThisUri);
} catch (ClassCastException e) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: each config item must be an " + "object with keys/values describing options for each URI.");
}
MongoSplitter splitter;
Class<? extends MongoSplitter> splitterClass = MongoConfigUtil.getSplitterClass(confForThisUri);
if (splitterClass != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Using custom Splitter class for namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//Make sure that the custom class isn't this one
if (splitterClass == MultiMongoCollectionSplitter.class) {
throw new IllegalArgumentException("Can't nest uses of MultiMongoCollectionSplitter");
}
//All clear.
MongoCollectionSplitter collectionSplitter;
collectionSplitter = (MongoCollectionSplitter) ReflectionUtils.newInstance(splitterClass, confForThisUri);
//Since we use no-arg constructor, need to inject
//configuration and input URI.
collectionSplitter.setConfiguration(confForThisUri);
splitter = collectionSplitter;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Fetching collection stats on namespace: %s.%s; hosts: %s to choose splitter implementation.", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//No class was specified, so choose one by looking at
//collection stats.
splitter = MongoSplitterFactory.getSplitterByStats(inputURI, confForThisUri);
}
splitters.add(splitter);
}
}
//compile them into one big ol' list.
for (MongoSplitter splitter : splitters) {
returnVal.addAll(splitter.calculateSplits());
}
return returnVal;
}
Aggregations