use of com.mongodb.MongoClientURI in project mongo-hadoop by mongodb.
the class MongoSplitterFactory method getSplitterByStats.
public static MongoCollectionSplitter getSplitterByStats(final MongoClientURI uri, final Configuration config) {
/* Looks at the collection in mongo.input.uri
* and choose an implementation based on what's in there. */
MongoCollectionSplitter returnVal;
// big split for the whole collection.
if (!MongoConfigUtil.createInputSplits(config)) {
returnVal = new SingleMongoSplitter(config);
} else {
MongoClientURI authURI = MongoConfigUtil.getAuthURI(config);
CommandResult stats;
DBCollection coll = null;
CommandResult buildInfo;
try {
if (authURI != null) {
coll = MongoConfigUtil.getCollectionWithAuth(uri, authURI);
stats = coll.getStats();
LOG.info("Retrieved Collection stats:" + stats);
} else {
coll = MongoConfigUtil.getCollection(uri);
stats = coll.getStats();
}
buildInfo = coll.getDB().command("buildinfo");
} finally {
if (coll != null) {
MongoConfigUtil.close(coll.getDB().getMongo());
}
}
if (!stats.getBoolean("ok", false)) {
throw new RuntimeException("Unable to calculate input splits from collection stats: " + stats.getString("errmsg"));
}
if (!stats.getBoolean("sharded", false)) {
// Prefer SampleSplitter.
List versionArray = (List) buildInfo.get("versionArray");
boolean sampleOperatorSupported = ((Integer) versionArray.get(0) > 3 || ((Integer) versionArray.get(0) == 3 && (Integer) versionArray.get(1) >= 2));
if (sampleOperatorSupported) {
returnVal = new SampleSplitter(config);
} else {
returnVal = new StandaloneMongoSplitter(config);
}
} else {
// Collection is sharded
if (MongoConfigUtil.isShardChunkedSplittingEnabled(config)) {
// Creates one split per chunk.
returnVal = new ShardChunkMongoSplitter(config);
} else if (MongoConfigUtil.canReadSplitsFromShards(config)) {
// Creates one split per shard, but ignores chunk bounds.
// Reads from shards directly (bypassing mongos).
// Not usually recommended.
returnVal = new ShardMongoSplitter(config);
} else {
//Not configured to use chunks or shards -
//so treat this the same as if it was an unsharded collection
returnVal = new StandaloneMongoSplitter(config);
}
}
}
return returnVal;
}
use of com.mongodb.MongoClientURI in project mongo-hadoop by mongodb.
the class MultiMongoCollectionSplitter method calculateSplits.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
List<MongoClientURI> inputURIs = MongoConfigUtil.getMongoURIs(this.getConfiguration(), MongoConfigUtil.INPUT_URI);
List<InputSplit> returnVal = new LinkedList<InputSplit>();
List<MongoSplitter> splitters = new LinkedList<MongoSplitter>();
//splitter for each implementation.
if (inputURIs.size() > 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Using global split settings for multiple URIs specified.");
}
//the configuration instead.
for (MongoClientURI uri : inputURIs) {
MongoCollectionSplitter splitter;
Configuration confForThisUri = new Configuration(getConfiguration());
MongoConfigUtil.setInputURI(confForThisUri, uri);
confForThisUri.set(MongoConfigUtil.MONGO_SPLITTER_CLASS, "");
splitter = MongoSplitterFactory.getSplitterByStats(uri, confForThisUri);
splitters.add(splitter);
}
} else {
//Otherwise the user has set options per-collection.
if (LOG.isDebugEnabled()) {
LOG.debug("Loading multiple input URIs from JSON stored in " + MULTI_COLLECTION_CONF_KEY);
}
DBObject multiUriConfig = MongoConfigUtil.getDBObject(this.getConfiguration(), MULTI_COLLECTION_CONF_KEY);
if (!(multiUriConfig instanceof List)) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: Must be an array where each element " + "is an object describing the URI and config options for each split.");
}
for (Object obj : (List) multiUriConfig) {
Map<String, Object> configMap;
MongoClientURI inputURI;
Configuration confForThisUri;
try {
configMap = (Map<String, Object>) obj;
if (LOG.isDebugEnabled()) {
LOG.debug("building config from " + configMap.toString());
}
confForThisUri = MongoConfigUtil.buildConfiguration(configMap);
inputURI = MongoConfigUtil.getInputURI(confForThisUri);
} catch (ClassCastException e) {
throw new IllegalArgumentException("Invalid JSON format in multi uri config key: each config item must be an " + "object with keys/values describing options for each URI.");
}
MongoSplitter splitter;
Class<? extends MongoSplitter> splitterClass = MongoConfigUtil.getSplitterClass(confForThisUri);
if (splitterClass != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Using custom Splitter class for namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//Make sure that the custom class isn't this one
if (splitterClass == MultiMongoCollectionSplitter.class) {
throw new IllegalArgumentException("Can't nest uses of MultiMongoCollectionSplitter");
}
//All clear.
MongoCollectionSplitter collectionSplitter;
collectionSplitter = (MongoCollectionSplitter) ReflectionUtils.newInstance(splitterClass, confForThisUri);
//Since we use no-arg constructor, need to inject
//configuration and input URI.
collectionSplitter.setConfiguration(confForThisUri);
splitter = collectionSplitter;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(format("Fetching collection stats on namespace: %s.%s; hosts: %s to choose splitter implementation.", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
//No class was specified, so choose one by looking at
//collection stats.
splitter = MongoSplitterFactory.getSplitterByStats(inputURI, confForThisUri);
}
splitters.add(splitter);
}
}
//compile them into one big ol' list.
for (MongoSplitter splitter : splitters) {
returnVal.addAll(splitter.calculateSplits());
}
return returnVal;
}
use of com.mongodb.MongoClientURI in project mongo-hadoop by mongodb.
the class ShardMongoSplitter method calculateSplits.
// Treat each shard as one split.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
final ArrayList<InputSplit> returnVal = new ArrayList<InputSplit>();
MongoClientURI inputURI = MongoConfigUtil.getInputURI(getConfiguration());
Map<String, List<String>> shardsMap;
try {
shardsMap = getShardsMap();
for (Entry<String, List<String>> entry : shardsMap.entrySet()) {
List<String> shardHosts = entry.getValue();
MongoInputSplit chunkSplit = createSplitFromBounds(null, null);
chunkSplit.setInputURI(rewriteURI(inputURI, shardHosts));
returnVal.add(chunkSplit);
}
} finally {
// getShardsMap() creates a client to a config server. Close it now.
MongoConfigUtil.close(getConfigDB().getMongo());
}
if (MongoConfigUtil.isFilterEmptySplitsEnabled(getConfiguration())) {
return filterEmptySplits(returnVal);
}
return returnVal;
}
use of com.mongodb.MongoClientURI in project mongo-hadoop by mongodb.
the class BookstoreTest method tagsIndex.
@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
MongoClient mongoClient = new MongoClient(uri);
DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
if (!HADOOP_VERSION.startsWith("1.")) {
job.inputFormat(BSONFileInputFormat.class);
} else {
job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
job.mapredOutputFormat(MongoOutputFormat.class);
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(false);
DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
assertNotNull(object);
List books = (List) object.get("books");
Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
use of com.mongodb.MongoClientURI in project mongo-hadoop by mongodb.
the class MongoPaginatingSplitterTest method setUp.
@Before
public void setUp() {
uri = new MongoClientURI("mongodb://localhost:27017/mongo_hadoop.pag_split_test");
MongoClient client = new MongoClient("localhost", 27017);
collection = client.getDatabase("mongo_hadoop").getCollection("pag_split_test");
collection.drop();
for (int i = 0; i < 40000; ++i) {
collection.insertOne(new Document("_id", i).append("value", i));
}
}
Aggregations