use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.
the class StandaloneMongoSplitter method calculateSplits.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
final DBObject splitKey = MongoConfigUtil.getInputSplitKey(getConfiguration());
final DBObject splitKeyMax = MongoConfigUtil.getMaxSplitKey(getConfiguration());
final DBObject splitKeyMin = MongoConfigUtil.getMinSplitKey(getConfiguration());
final int splitSize = MongoConfigUtil.getSplitSize(getConfiguration());
final MongoClientURI inputURI;
DBCollection inputCollection = null;
final ArrayList<InputSplit> returnVal;
try {
inputURI = MongoConfigUtil.getInputURI(getConfiguration());
MongoClientURI authURI = MongoConfigUtil.getAuthURI(getConfiguration());
if (authURI != null) {
inputCollection = MongoConfigUtil.getCollectionWithAuth(inputURI, authURI);
} else {
inputCollection = MongoConfigUtil.getCollection(inputURI);
}
returnVal = new ArrayList<InputSplit>();
final String ns = inputCollection.getFullName();
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Running splitVector on namespace: %s.%s; hosts: %s", inputURI.getDatabase(), inputURI.getCollection(), inputURI.getHosts()));
}
final DBObject cmd = BasicDBObjectBuilder.start("splitVector", ns).add("keyPattern", splitKey).add("min", splitKeyMin).add("max", splitKeyMax).add("force", false).add("maxChunkSize", splitSize).get();
CommandResult data;
boolean ok = true;
try {
data = inputCollection.getDB().getSisterDB(inputURI.getDatabase()).command(cmd, ReadPreference.primary());
} catch (final MongoException e) {
// 2.0 servers throw exceptions rather than info in a CommandResult
data = null;
LOG.info(e.getMessage(), e);
if (e.getMessage().contains("unrecognized command: splitVector")) {
ok = false;
} else {
throw e;
}
}
if (data != null) {
if (data.containsField("$err")) {
throw new SplitFailedException("Error calculating splits: " + data);
} else if (!data.get("ok").equals(1.0)) {
ok = false;
}
}
if (!ok) {
final CommandResult stats = inputCollection.getStats();
if (stats.containsField("primary")) {
final DBCursor shards = inputCollection.getDB().getSisterDB("config").getCollection("shards").find(new BasicDBObject("_id", stats.getString("primary")));
try {
if (shards.hasNext()) {
final DBObject shard = shards.next();
final String host = ((String) shard.get("host")).replace(shard.get("_id") + "/", "");
final MongoClientURI shardHost;
if (authURI != null) {
shardHost = new MongoClientURIBuilder(authURI).host(host).build();
} else {
shardHost = new MongoClientURIBuilder(inputURI).host(host).build();
}
MongoClient shardClient = null;
try {
shardClient = new MongoClient(shardHost);
data = shardClient.getDB(shardHost.getDatabase()).command(cmd, ReadPreference.primary());
} catch (final Exception e) {
LOG.error(e.getMessage(), e);
} finally {
if (shardClient != null) {
shardClient.close();
}
}
}
} finally {
shards.close();
}
}
if (data != null && !data.get("ok").equals(1.0)) {
throw new SplitFailedException("Unable to calculate input splits: " + data.get("errmsg"));
}
}
// Comes in a format where "min" and "max" are implicit
// and each entry is just a boundary key; not ranged
final BasicDBList splitData = (BasicDBList) data.get("splitKeys");
if (splitData.size() == 0) {
LOG.warn("WARNING: No Input Splits were calculated by the split code. Proceeding with a *single* split. Data may be too" + " small, try lowering 'mongo.input.split_size' if this is undesirable.");
}
// Lower boundary of the first min split
BasicDBObject lastKey = null;
// If splitKeyMin was given, use it as first boundary.
if (!splitKeyMin.toMap().isEmpty()) {
lastKey = new BasicDBObject(splitKeyMin.toMap());
}
for (final Object aSplitData : splitData) {
final BasicDBObject currentKey = (BasicDBObject) aSplitData;
returnVal.add(createSplitFromBounds(lastKey, currentKey));
lastKey = currentKey;
}
BasicDBObject maxKey = null;
// If splitKeyMax was given, use it as last boundary.
if (!splitKeyMax.toMap().isEmpty()) {
maxKey = new BasicDBObject(splitKeyMax.toMap());
}
// Last max split
final MongoInputSplit lastSplit = createSplitFromBounds(lastKey, maxKey);
returnVal.add(lastSplit);
} finally {
if (inputCollection != null) {
MongoConfigUtil.close(inputCollection.getDB().getMongo());
}
}
if (MongoConfigUtil.isFilterEmptySplitsEnabled(getConfiguration())) {
return filterEmptySplits(returnVal);
}
return returnVal;
}
use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.
the class MongoSplitterFactory method getSplitterByStats.
public static MongoCollectionSplitter getSplitterByStats(final MongoClientURI uri, final Configuration config) {
/* Looks at the collection in mongo.input.uri
* and choose an implementation based on what's in there. */
MongoCollectionSplitter returnVal;
// big split for the whole collection.
if (!MongoConfigUtil.createInputSplits(config)) {
returnVal = new SingleMongoSplitter(config);
} else {
MongoClientURI authURI = MongoConfigUtil.getAuthURI(config);
CommandResult stats;
DBCollection coll = null;
CommandResult buildInfo;
try {
if (authURI != null) {
coll = MongoConfigUtil.getCollectionWithAuth(uri, authURI);
stats = coll.getStats();
LOG.info("Retrieved Collection stats:" + stats);
} else {
coll = MongoConfigUtil.getCollection(uri);
stats = coll.getStats();
}
buildInfo = coll.getDB().command("buildinfo");
} finally {
if (coll != null) {
MongoConfigUtil.close(coll.getDB().getMongo());
}
}
if (!stats.getBoolean("ok", false)) {
throw new RuntimeException("Unable to calculate input splits from collection stats: " + stats.getString("errmsg"));
}
if (!stats.getBoolean("sharded", false)) {
// Prefer SampleSplitter.
List versionArray = (List) buildInfo.get("versionArray");
boolean sampleOperatorSupported = ((Integer) versionArray.get(0) > 3 || ((Integer) versionArray.get(0) == 3 && (Integer) versionArray.get(1) >= 2));
if (sampleOperatorSupported) {
returnVal = new SampleSplitter(config);
} else {
returnVal = new StandaloneMongoSplitter(config);
}
} else {
// Collection is sharded
if (MongoConfigUtil.isShardChunkedSplittingEnabled(config)) {
// Creates one split per chunk.
returnVal = new ShardChunkMongoSplitter(config);
} else if (MongoConfigUtil.canReadSplitsFromShards(config)) {
// Creates one split per shard, but ignores chunk bounds.
// Reads from shards directly (bypassing mongos).
// Not usually recommended.
returnVal = new ShardMongoSplitter(config);
} else {
//Not configured to use chunks or shards -
//so treat this the same as if it was an unsharded collection
returnVal = new StandaloneMongoSplitter(config);
}
}
}
return returnVal;
}
use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.
the class ShardChunkMongoSplitter method calculateSplits.
// Generate one split per chunk.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
DB configDB = getConfigDB();
DBCollection chunksCollection = configDB.getCollection("chunks");
Map<String, List<String>> shardsMap;
try {
shardsMap = getShardsMap();
} catch (Exception e) {
//so abort the splitting
throw new SplitFailedException("Couldn't get shards information from config server", e);
}
return calculateSplitsFromChunks(chunksCollection.find().toArray(), shardsMap);
}
use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.
the class BookstoreTest method tagsIndex.
@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
MongoClient mongoClient = new MongoClient(uri);
DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
if (!HADOOP_VERSION.startsWith("1.")) {
job.inputFormat(BSONFileInputFormat.class);
} else {
job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
job.mapredOutputFormat(MongoOutputFormat.class);
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(false);
DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
assertNotNull(object);
List books = (List) object.get("books");
Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
use of com.mongodb.DBCollection in project mongo-hadoop by mongodb.
the class MongoInputSplit method getCursor.
public DBCursor getCursor() {
if (this.cursor == null) {
DBCollection coll;
if (this.authURI != null) {
coll = MongoConfigUtil.getCollectionWithAuth(this.inputURI, this.authURI);
} else {
coll = MongoConfigUtil.getCollection(this.inputURI);
}
this.cursor = coll.find(this.query, this.fields).sort(this.sort);
if (this.notimeout) {
this.cursor.setOptions(Bytes.QUERYOPTION_NOTIMEOUT);
}
if (this.min != null) {
this.cursor.addSpecial("$min", this.min);
}
if (this.max != null) {
this.cursor.addSpecial("$max", this.max);
}
if (skip != null) {
cursor = cursor.skip(skip);
}
if (limit != null) {
cursor = cursor.limit(limit);
}
}
return this.cursor;
}
Aggregations