Search in sources :

Example 1 with MapReduceJob

use of com.mongodb.hadoop.testutils.MapReduceJob in project mongo-hadoop by mongodb.

the class BookstoreTest method tagsIndex.

@Test
public void tagsIndex() throws URISyntaxException, UnknownHostException, IllegalAccessException {
    MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", "bookstore_tags")).build();
    MongoClient mongoClient = new MongoClient(uri);
    DBCollection collection = mongoClient.getDB(uri.getDatabase()).getCollection(uri.getCollection());
    MapReduceJob job = new MapReduceJob(BookstoreConfig.class.getName()).jar(JAR_PATH).inputUris(INVENTORY_BSON).outputUri(uri).param("mapred.input.dir", INVENTORY_BSON.toString());
    if (!HADOOP_VERSION.startsWith("1.")) {
        job.inputFormat(BSONFileInputFormat.class);
    } else {
        job.mapredInputFormat(com.mongodb.hadoop.mapred.BSONFileInputFormat.class);
        job.mapredOutputFormat(MongoOutputFormat.class);
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(false);
    DBObject object = collection.findOne(new BasicDBObject("_id", "history"));
    assertNotNull(object);
    List books = (List) object.get("books");
    Assert.assertEquals("Should find only 8 books", books.size(), 8);
}
Also used : MongoClient(com.mongodb.MongoClient) DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MongoClientURI(com.mongodb.MongoClientURI) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) List(java.util.List) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Example 2 with MapReduceJob

use of com.mongodb.hadoop.testutils.MapReduceJob in project mongo-hadoop by mongodb.

the class TestSharded method testRangeQueries.

@Test
public void testRangeQueries() {
    DBCollection collection = getMongos().getDB(getOutputUri().getDatabase()).getCollection(getOutputUri().getCollection());
    collection.drop();
    MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).inputUris(getInputUri()).outputUri(getOutputUri()).param(SPLITS_USE_RANGEQUERY, "true");
    if (isHadoopV1()) {
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(isRunTestInVm());
    compareResults(collection, getReference());
    collection.drop();
    job.param(INPUT_QUERY, "{\"_id\":{\"$gt\":{\"$date\":1182470400000}}}").execute(isRunTestInVm());
    // Make sure that this fails when rangequery is used with a query that conflicts
    assertFalse("This collection shouldn't exist because of the failure", getMongos().getDB("mongo_hadoop").getCollectionNames().contains("yield_historical.out"));
}
Also used : DBCollection(com.mongodb.DBCollection) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) TreasuryYieldXMLConfig(com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfig) Test(org.junit.Test)

Example 3 with MapReduceJob

use of com.mongodb.hadoop.testutils.MapReduceJob in project mongo-hadoop by mongodb.

the class TestSharded method testDirectAccess.

public void testDirectAccess() {
    DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out");
    collection.drop();
    // HADOOP61 - simulate a failed migration by having some docs from one chunk
    // also exist on another shard who does not own that chunk(duplicates)
    DB config = getMongos().getDB("config");
    DBObject chunk = config.getCollection("chunks").findOne(new BasicDBObject("shard", "sh01"));
    DBObject query = new BasicDBObject("_id", new BasicDBObject("$gte", ((DBObject) chunk.get("min")).get("_id")).append("$lt", ((DBObject) chunk.get("max")).get("_id")));
    List<DBObject> data = toList(getMongos().getDB("mongo_hadoop").getCollection("yield_historical.in").find(query));
    DBCollection destination = getShard().getDB("mongo_hadoop").getCollection("yield_historical.in");
    for (DBObject doc : data) {
        destination.insert(doc, WriteConcern.UNACKNOWLEDGED);
    }
    MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "false").inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build());
    if (isHadoopV1()) {
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(isRunTestInVm());
    compareResults(collection, getReference());
    collection.drop();
    MapReduceJob jobWithChunks = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build()).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "true");
    if (isHadoopV1()) {
        jobWithChunks.outputCommitter(MongoOutputCommitter.class);
    }
    jobWithChunks.execute(isRunTestInVm());
    compareResults(collection, getReference());
}
Also used : DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) DB(com.mongodb.DB)

Example 4 with MapReduceJob

use of com.mongodb.hadoop.testutils.MapReduceJob in project mongo-hadoop by mongodb.

the class TestSharded method testMultiMongos.

@Test
public void testMultiMongos() {
    MongoClientURI outputUri = getOutputUri();
    MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param(INPUT_MONGOS_HOSTS, "localhost:27017 localhost:27018").inputUris(getInputUri()).outputUri(outputUri);
    if (isHadoopV1()) {
        job.outputCommitter(MongoOutputCommitter.class);
    }
    job.execute(isRunTestInVm());
    compareResults(getMongos().getDB(outputUri.getDatabase()).getCollection(outputUri.getCollection()), getReference());
}
Also used : MongoClientURI(com.mongodb.MongoClientURI) MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) TreasuryYieldXMLConfig(com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfig) Test(org.junit.Test)

Example 5 with MapReduceJob

use of com.mongodb.hadoop.testutils.MapReduceJob in project mongo-hadoop by mongodb.

the class TestStandalone method testBasicInputSource.

@Test
public void testBasicInputSource() {
    LOG.info("testing basic input source");
    MapReduceJob treasuryJob = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param("mongo.input.notimeout", "true").inputUris(getInputUri()).outputUri(getOutputUri());
    if (isHadoopV1()) {
        treasuryJob.outputCommitter(MongoOutputCommitter.class);
    }
    treasuryJob.execute(isRunTestInVm());
    compareResults(getClient(getInputUri()).getDB(getOutputUri().getDatabase()).getCollection(getOutputUri().getCollection()), getReference());
}
Also used : MapReduceJob(com.mongodb.hadoop.testutils.MapReduceJob) TreasuryYieldXMLConfig(com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfig) Test(org.junit.Test)

Aggregations

MapReduceJob (com.mongodb.hadoop.testutils.MapReduceJob)9 Test (org.junit.Test)8 DBCollection (com.mongodb.DBCollection)4 TreasuryYieldXMLConfig (com.mongodb.hadoop.examples.treasury.TreasuryYieldXMLConfig)4 BasicDBObject (com.mongodb.BasicDBObject)2 DBObject (com.mongodb.DBObject)2 MongoClientURI (com.mongodb.MongoClientURI)2 MultiMongoCollectionSplitter (com.mongodb.hadoop.splitter.MultiMongoCollectionSplitter)2 MongoClientURIBuilder (com.mongodb.hadoop.util.MongoClientURIBuilder)2 DB (com.mongodb.DB)1 MongoClient (com.mongodb.MongoClient)1 BaseHadoopTest (com.mongodb.hadoop.testutils.BaseHadoopTest)1 List (java.util.List)1