use of org.apache.hadoop.mapreduce.InputSplit in project mongo-hadoop by mongodb.
the class MongoPaginatingSplitterTest method testQuery.
@Test
public void testQuery() throws SplitFailedException {
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri);
MongoConfigUtil.setRangeQueryEnabled(conf, true);
MongoConfigUtil.setInputSplitMinDocs(conf, 5000);
DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 25000)), new BasicDBObject("value", new BasicDBObject("$gte", 31000)) });
MongoConfigUtil.setQuery(conf, query);
MongoPaginatingSplitter splitter = new MongoPaginatingSplitter(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(7, splits.size());
assertSplitRange((MongoInputSplit) splits.get(0), null, 5000);
assertSplitRange((MongoInputSplit) splits.get(1), 5000, 10000);
assertSplitRange((MongoInputSplit) splits.get(2), 10000, 15000);
assertSplitRange((MongoInputSplit) splits.get(3), 15000, 20000);
assertSplitRange((MongoInputSplit) splits.get(4), 20000, 31000);
assertSplitRange((MongoInputSplit) splits.get(5), 31000, 36000);
assertSplitRange((MongoInputSplit) splits.get(6), 36000, null);
// 6000 documents excluded by query.
assertSplitsCount(collection.count() - 6000, splits);
}
use of org.apache.hadoop.mapreduce.InputSplit in project mongo-hadoop by mongodb.
the class SampleSplitterTest method testAllOnOneSplit.
@Test
public void testAllOnOneSplit() throws SplitFailedException {
assumeTrue(isSampleOperatorSupported(uri));
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri.getURI());
// Split size is enough to encapsulate all documents.
MongoConfigUtil.setSplitSize(conf, 12);
splitter.setConfiguration(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(1, splits.size());
MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0);
assertTrue(firstSplit.getMin().toMap().isEmpty());
assertTrue(firstSplit.getMax().toMap().isEmpty());
}
use of org.apache.hadoop.mapreduce.InputSplit in project mongo-hadoop by mongodb.
the class SampleSplitterTest method testAlternateSplitKey.
@Test
public void testAlternateSplitKey() throws SplitFailedException {
assumeTrue(isSampleOperatorSupported(uri));
Configuration conf = new Configuration();
MongoConfigUtil.setInputURI(conf, uri.getURI());
MongoConfigUtil.setSplitSize(conf, 1);
MongoConfigUtil.setInputSplitKeyPattern(conf, "{\"i\": 1}");
splitter.setConfiguration(conf);
List<InputSplit> splits = splitter.calculateSplits();
assertEquals(12, splits.size());
MongoInputSplit firstSplit = (MongoInputSplit) splits.get(0);
assertTrue(firstSplit.getMin().toMap().isEmpty());
MongoInputSplit lastSplit = (MongoInputSplit) splits.get(11);
assertTrue(lastSplit.getMax().toMap().isEmpty());
// Ranges for splits are ascending.
int lastKey = (Integer) firstSplit.getMax().get("i");
for (int i = 1; i < splits.size() - 1; i++) {
MongoInputSplit split = (MongoInputSplit) splits.get(i);
int currentKey = (Integer) split.getMax().get("i");
assertTrue(currentKey > lastKey);
lastKey = currentKey;
}
}
use of org.apache.hadoop.mapreduce.InputSplit in project mongo-hadoop by mongodb.
the class StandaloneMongoSplitterTest method unshardedCollectionMinMax.
@Test
public void unshardedCollectionMinMax() throws UnknownHostException, SplitFailedException {
Configuration config = new Configuration();
StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
MongoConfigUtil.setInputURI(config, uri);
DBObject inputSplitKey = BasicDBObjectBuilder.start("value", 1).get();
MongoConfigUtil.setInputSplitKey(config, inputSplitKey);
MongoConfigUtil.setSplitSize(config, 1);
List<InputSplit> regularSplits = splitter.calculateSplits();
MongoConfigUtil.setMinSplitKey(config, "{value:100}");
MongoConfigUtil.setMaxSplitKey(config, "{value:39900}");
List<InputSplit> inputSplits = splitter.calculateSplits();
assertTrue("should be fewer splits with min/max set", regularSplits.size() >= inputSplits.size());
}
use of org.apache.hadoop.mapreduce.InputSplit in project mongo-hadoop by mongodb.
the class StandaloneMongoSplitterTest method testFilterEmptySplits.
@Test
public void testFilterEmptySplits() throws SplitFailedException {
Configuration config = new Configuration();
DBObject query = new BasicDBObject("$or", new BasicDBObject[] { new BasicDBObject("value", new BasicDBObject("$lt", 20000)), new BasicDBObject("value", new BasicDBObject("$gt", 35000)) });
MongoConfigUtil.setInputURI(config, uri);
MongoConfigUtil.setEnableFilterEmptySplits(config, true);
MongoConfigUtil.setQuery(config, query);
// 1 MB per document results in 4 splits; the 3rd one is empty per
// the above query.
MongoConfigUtil.setSplitSize(config, 1);
StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
List<InputSplit> splits = splitter.calculateSplits();
// No splits are empty.
for (InputSplit split : splits) {
// Cursor is closed on the split, so copy it to create a new one.
MongoInputSplit mis = new MongoInputSplit((MongoInputSplit) split);
assertNotEquals(0, mis.getCursor().itcount());
}
assertSplitsCount(collection.count(query), splits);
}
Aggregations