use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.
the class HiveMongoInputFormat method getRecordReader.
@Override
public RecordReader<BSONWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf conf, final Reporter reporter) throws IOException {
// split is of type 'MongoHiveInputSplit'
MongoHiveInputSplit mhis = (MongoHiveInputSplit) split;
// Get column name mapping.
Map<String, String> colToMongoNames = columnMapping(conf);
// Add projection from Hive.
DBObject mongoProjection = getProjection(conf, colToMongoNames);
MongoInputSplit delegate = (MongoInputSplit) mhis.getDelegate();
if (mongoProjection != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB projection : " + mongoProjection);
}
delegate.setFields(mongoProjection);
}
// Filter from Hive.
DBObject filter = getFilter(conf, colToMongoNames);
// Combine with filter from table, if there is one.
if (conf.get(MongoConfigUtil.INPUT_QUERY) != null) {
DBObject tableFilter = MongoConfigUtil.getQuery(conf);
if (null == filter) {
filter = tableFilter;
} else {
BasicDBList conditions = new BasicDBList();
conditions.add(filter);
conditions.add(tableFilter);
// Use $and clause so we don't overwrite any of the table
// filter.
filter = new BasicDBObject("$and", conditions);
}
}
if (filter != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB query: " + filter);
}
delegate.setQuery(filter);
}
// return MongoRecordReader. Delegate is of type 'MongoInputSplit'
return new MongoRecordReader(delegate);
}
use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.
the class MongoPaginatingSplitterTest method testNoQuery.
@Test
public void testNoQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(8, splits.size());
for (int i = 0; i < splits.size(); ++i) {
Integer min = i == 0 ? null : i * 5000;
Integer max = i == splits.size() - 1 ? null : (i + 1) * 5000;
assertSplitRange((MongoInputSplit) splits.get(i), min, max);
}
assertSplitsCount(collection.count(), splits);
}
use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.
the class MongoRecordReaderTest method testGetCurrentKey.
@Test
public void testGetCurrentKey() throws Exception {
MongoClient client = new MongoClient("localhost", 27017);
MongoClientURI uri = new MongoClientURIBuilder().collection("mongo_hadoop", "mongo_record_reader_test").build();
DBCollection collection = client.getDB(uri.getDatabase()).getCollection(uri.getCollection());
collection.drop();
BasicDBList colors = new BasicDBList() {
{
add(new BasicBSONObject("red", 255));
add(new BasicBSONObject("blue", 255));
add(new BasicBSONObject("green", 0));
}
};
collection.insert(new BasicDBObject("_id", 0).append("address", new BasicDBObject("street", "foo street")).append("colors", colors));
// Default case: "_id" is used as inputKey.
MongoInputSplit split = new MongoInputSplit();
split.setInputURI(uri);
MongoRecordReader reader = new MongoRecordReader(split);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), 0);
// Use a nested field as inputKey.
split = new MongoInputSplit();
split.setInputURI(uri);
split.setKeyField("address.street");
reader = new MongoRecordReader(split);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), "foo street");
// Use a key within an array as the inputKey.
split = new MongoInputSplit();
split.setInputURI(uri);
split.setKeyField("colors.1");
reader = new MongoRecordReader(split);
assertTrue(reader.nextKeyValue());
assertEquals(reader.getCurrentKey(), new BasicBSONObject("blue", 255));
}
use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.
the class MongoSplitterTestUtils method assertSplitsCount.
/**
* Assert that a list of splits has the expected overall count.
* @param expected the expected count
* @param splits a list of MongoInputSplits
*/
public static void assertSplitsCount(final long expected, final List<InputSplit> splits) {
int splitTotal = 0;
for (InputSplit split : splits) {
// Cursors have been closed; create a copy of the MongoInputSplit.
MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split);
// Query doesn't play nice with min/max, so use itcount for test.
splitTotal += mis.getCursor().itcount();
}
assertEquals(expected, splitTotal);
}
use of com.mongodb.hadoop.input.MongoInputSplit in project mongo-hadoop by mongodb.
the class SampleSplitterTest method testCalculateSplits.
@Test
public void testCalculateSplits() throws SplitFailedException {
assumeTrue(isSampleOperatorSupported(uri));
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri.getURI());
MongoConfigUtil.setSplitSize(conf, 1);
splitter.setConfiguration(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(12, splits.size());
MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0);
assertTrue(firstSplit.getMin().toMap().isEmpty());
MongoInputSplit lastSplit = (MongoInputSplit) splits.get(11);
assertTrue(lastSplit.getMax().toMap().isEmpty());
// Ranges for splits are ascending.
int lastKey = (Integer) firstSplit.getMax().get("_id");
for (int i = 1; i < splits.size() - 1; i++) {
MongoInputSplit split = (MongoInputSplit) splits.get(i);
int currentKey = (Integer) split.getMax().get("_id");
assertTrue(currentKey > lastKey);
lastKey = currentKey;
}
}
Aggregations