Search in sources :

Example 11 with MongoInputSplit

use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.

the class HiveMongoInputFormat method getRecordReader.

@Override
public RecordReader<BSONWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf conf, final Reporter reporter) throws IOException {
    // split is of type 'MongoHiveInputSplit'
    MongoHiveInputSplit mhis = (MongoHiveInputSplit) split;
    // Get column name mapping.
    Map<String, String> colToMongoNames = columnMapping(conf);
    // Add projection from Hive.
    DBObject mongoProjection = getProjection(conf, colToMongoNames);
    MongoInputSplit delegate = (MongoInputSplit) mhis.getDelegate();
    if (mongoProjection != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding MongoDB projection : " + mongoProjection);
        }
        delegate.setFields(mongoProjection);
    }
    // Filter from Hive.
    DBObject filter = getFilter(conf, colToMongoNames);
    // Combine with filter from table, if there is one.
    if (conf.get(MongoConfigUtil.INPUT_QUERY) != null) {
        DBObject tableFilter = MongoConfigUtil.getQuery(conf);
        if (null == filter) {
            filter = tableFilter;
        } else {
            BasicDBList conditions = new BasicDBList();
            conditions.add(filter);
            conditions.add(tableFilter);
            // Use $and clause so we don't overwrite any of the table
            // filter.
            filter = new BasicDBObject("$and", conditions);
        }
    }
    if (filter != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Adding MongoDB query: " + filter);
        }
        delegate.setQuery(filter);
    }
    // return MongoRecordReader. Delegate is of type 'MongoInputSplit'
    return new MongoRecordReader(delegate);
}
Also used : BasicDBList(com.mongodb.BasicDBList) BasicDBObject(com.mongodb.BasicDBObject) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) MongoRecordReader(com.mongodb.hadoop.mapred.input.MongoRecordReader) DBObject(com.mongodb.DBObject) BasicDBObject(com.mongodb.BasicDBObject)

Example 12 with MongoInputSplit

use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.

the class MongoPaginatingSplitterTest method testNoQuery.

@Test
public void testNoQuery() throws SplitFailedException {
    Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, uri);
    MongoConfigUtil.setRangeQueryEnabled(conf, true);
    MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
    MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
    List<InputSplit> splits = splitter.calculateSplits();
    assertEquals(8, splits.size());
    for (int i = 0; i < splits.size(); ++i) {
        Integer min = i == 0 ? null : i * 5000;
        Integer max = i == splits.size() - 1 ? null : (i + 1) * 5000;
        assertSplitRange((MongoInputSplit) splits.get(i), min, max);
    }
    assertSplitsCount(collection.count(), splits);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Test(org.junit.Test)

Example 13 with MongoInputSplit

use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.

the class MongoRecordReaderTest method testGetCurrentKey.

@Test
public void testGetCurrentKey() throws Exception {
    MongoClient client = new MongoClient("localhost", 27017);
    MongoClientURI uri = new MongoClientURIBuilder().collection("mongo_hadoop", "mongo_record_reader_test").build();
    DBCollection collection = client.getDB(uri.getDatabase()).getCollection(uri.getCollection());
    collection.drop();
    BasicDBList colors = new BasicDBList() {

        {
            add(new BasicBSONObject("red", 255));
            add(new BasicBSONObject("blue", 255));
            add(new BasicBSONObject("green", 0));
        }
    };
    collection.insert(new BasicDBObject("_id", 0).append("address", new BasicDBObject("street", "foo street")).append("colors", colors));
    // Default case: "_id" is used as inputKey.
    MongoInputSplit split = new MongoInputSplit();
    split.setInputURI(uri);
    MongoRecordReader reader = new MongoRecordReader(split);
    assertTrue(reader.nextKeyValue());
    assertEquals(reader.getCurrentKey(), 0);
    // Use a nested field as inputKey.
    split = new MongoInputSplit();
    split.setInputURI(uri);
    split.setKeyField("address.street");
    reader = new MongoRecordReader(split);
    assertTrue(reader.nextKeyValue());
    assertEquals(reader.getCurrentKey(), "foo street");
    // Use a key within an array as the inputKey.
    split = new MongoInputSplit();
    split.setInputURI(uri);
    split.setKeyField("colors.1");
    reader = new MongoRecordReader(split);
    assertTrue(reader.nextKeyValue());
    assertEquals(reader.getCurrentKey(), new BasicBSONObject("blue", 255));
}
Also used : MongoClient(com.mongodb.MongoClient) DBCollection(com.mongodb.DBCollection) BasicDBList(com.mongodb.BasicDBList) BasicBSONObject(org.bson.BasicBSONObject) BasicDBObject(com.mongodb.BasicDBObject) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) MongoClientURIBuilder(com.mongodb.hadoop.util.MongoClientURIBuilder) MongoRecordReader(com.mongodb.hadoop.input.MongoRecordReader) MongoClientURI(com.mongodb.MongoClientURI) Test(org.junit.Test)

Example 14 with MongoInputSplit

use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.

the class MongoSplitterTestUtils method assertSplitsCount.

/**
     * Assert that a list of splits has the expected overall count.
     * @param expected the expected count
     * @param splits a list of MongoInputSplits
     */
public static void assertSplitsCount(final long expected, final List<InputSplit> splits) {
    int splitTotal = 0;
    for (InputSplit split : splits) {
        // Cursors have been closed; create a copy of the MongoInputSplit.
        MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split);
        // Query doesn't play nice with min/max, so use itcount for test.
        splitTotal += mis.getCursor().itcount();
    }
    assertEquals(expected, splitTotal);
}
Also used : MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit)

Example 15 with MongoInputSplit

use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.

the class SampleSplitterTest method testCalculateSplits.

@Test
public void testCalculateSplits() throws SplitFailedException {
    assumeTrue(isSampleOperatorSupported(uri));
    Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, uri.getURI());
    MongoConfigUtil.setSplitSize(conf, 1);
    splitter.setConfiguration(conf);
    List<InputSplit> splits = splitter.calculateSplits();
    assertEquals(12, splits.size());
    MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0);
    assertTrue(firstSplit.getMin().toMap().isEmpty());
    MongoInputSplit lastSplit = (MongoInputSplit) splits.get(11);
    assertTrue(lastSplit.getMax().toMap().isEmpty());
    // Ranges for splits are ascending.
    int lastKey = (Integer) firstSplit.getMax().get("_id");
    for (int i = 1; i < splits.size() - 1; i++) {
        MongoInputSplit split = (MongoInputSplit) splits.get(i);
        int currentKey = (Integer) split.getMax().get("_id");
        assertTrue(currentKey > lastKey);
        lastKey = currentKey;
    }
}
Also used : MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Configuration(org.apache.hadoop.conf.Configuration) InputSplit(org.apache.hadoop.mapreduce.InputSplit) MongoInputSplit(com.mongodb.hadoop.input.MongoInputSplit) Test(org.junit.Test) BaseHadoopTest(com.mongodb.hadoop.testutils.BaseHadoopTest)

Aggregations

MongoInputSplit (com.mongodb.hadoop.input.MongoInputSplit)21 Test (org.junit.Test)13 BasicDBObject (com.mongodb.BasicDBObject)12 Configuration (org.apache.hadoop.conf.Configuration)12 InputSplit (org.apache.hadoop.mapreduce.InputSplit)11 DBObject (com.mongodb.DBObject)7 MongoClientURI (com.mongodb.MongoClientURI)5 BasicDBList (com.mongodb.BasicDBList)3 BaseHadoopTest (com.mongodb.hadoop.testutils.BaseHadoopTest)3 DBCollection (com.mongodb.DBCollection)2 MongoClient (com.mongodb.MongoClient)2 MongoClientURIBuilder (com.mongodb.hadoop.util.MongoClientURIBuilder)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 BSONObject (org.bson.BSONObject)2 CommandResult (com.mongodb.CommandResult)1 DBCursor (com.mongodb.DBCursor)1 MongoException (com.mongodb.MongoException)1 MongoRecordReader (com.mongodb.hadoop.input.MongoRecordReader)1 MongoRecordReader (com.mongodb.hadoop.mapred.input.MongoRecordReader)1