use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class TestSharded method testDirectAccess.
public void testDirectAccess() {
DBCollection collection = getMongos().getDB("mongo_hadoop").getCollection("yield_historical.out");
collection.drop();
// HADOOP61 - simulate a failed migration by having some docs from one chunk
// also exist on another shard who does not own that chunk(duplicates)
DB config = getMongos().getDB("config");
DBObject chunk = config.getCollection("chunks").findOne(new BasicDBObject("shard", "sh01"));
DBObject query = new BasicDBObject("_id", new BasicDBObject("$gte", ((DBObject) chunk.get("min")).get("_id")).append("$lt", ((DBObject) chunk.get("max")).get("_id")));
List<DBObject> data = toList(getMongos().getDB("mongo_hadoop").getCollection("yield_historical.in").find(query));
DBCollection destination = getShard().getDB("mongo_hadoop").getCollection("yield_historical.in");
for (DBObject doc : data) {
destination.insert(doc, WriteConcern.UNACKNOWLEDGED);
}
MapReduceJob job = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "false").inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build());
if (isHadoopV1()) {
job.outputCommitter(MongoOutputCommitter.class);
}
job.execute(isRunTestInVm());
compareResults(collection, getReference());
collection.drop();
MapReduceJob jobWithChunks = new MapReduceJob(TreasuryYieldXMLConfig.class.getName()).jar(JOBJAR_PATH).inputUris(new MongoClientURIBuilder(getInputUri()).readPreference(ReadPreference.secondary()).build()).param(SPLITS_SLAVE_OK, "true").param(SPLITS_USE_SHARDS, "true").param(SPLITS_USE_CHUNKS, "true");
if (isHadoopV1()) {
jobWithChunks.outputCommitter(MongoOutputCommitter.class);
}
jobWithChunks.execute(isRunTestInVm());
compareResults(collection, getReference());
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class HiveMappingTest method queryBasedHiveTable.
@Test
public void queryBasedHiveTable() throws SQLException {
String tableName = "filtered";
DBCollection collection = getCollection(tableName);
collection.drop();
dropTable(tableName);
int size = 1000;
for (int i = 0; i < size; i++) {
collection.insert(new BasicDBObject("_id", i).append("intField", i % 10).append("booleanField", i % 2 == 0).append("stringField", "" + (i % 2 == 0)));
}
MongoClientURI uri = authCheck(new MongoClientURIBuilder().collection("mongo_hadoop", collection.getName())).build();
ColumnMapping map = new ColumnMapping().map("id", "_id", "INT").map("ints", "intField", "INT").map("booleans", "booleanField", "BOOLEAN").map("strings", "stringField", "STRING");
HiveTableBuilder builder = new HiveTableBuilder().mapping(map).name(tableName).uri(uri).tableProperty(MongoConfigUtil.INPUT_QUERY, "{_id : {\"$gte\" : 900 }}");
execute(builder.toString());
assertEquals(format("Should find %d items", size), collection.count(), size);
Results execute = query(format("SELECT * from %s where id=1", tableName));
assertTrue(execute.size() == 0);
int expected = size - 900;
assertEquals(format("Should find only %d items", expected), query("SELECT count(*) as count from " + tableName).iterator().next().get(0), "" + expected);
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class HiveMongoInputFormat method getRecordReader.
@Override
public RecordReader<BSONWritable, BSONWritable> getRecordReader(final InputSplit split, final JobConf conf, final Reporter reporter) throws IOException {
// split is of type 'MongoHiveInputSplit'
MongoHiveInputSplit mhis = (MongoHiveInputSplit) split;
// Get column name mapping.
Map<String, String> colToMongoNames = columnMapping(conf);
// Add projection from Hive.
DBObject mongoProjection = getProjection(conf, colToMongoNames);
MongoInputSplit delegate = (MongoInputSplit) mhis.getDelegate();
if (mongoProjection != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB projection : " + mongoProjection);
}
delegate.setFields(mongoProjection);
}
// Filter from Hive.
DBObject filter = getFilter(conf, colToMongoNames);
// Combine with filter from table, if there is one.
if (conf.get(MongoConfigUtil.INPUT_QUERY) != null) {
DBObject tableFilter = MongoConfigUtil.getQuery(conf);
if (null == filter) {
filter = tableFilter;
} else {
BasicDBList conditions = new BasicDBList();
conditions.add(filter);
conditions.add(tableFilter);
// Use $and clause so we don't overwrite any of the table
// filter.
filter = new BasicDBObject("$and", conditions);
}
}
if (filter != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding MongoDB query: " + filter);
}
delegate.setQuery(filter);
}
// return MongoRecordReader. Delegate is of type 'MongoInputSplit'
return new MongoRecordReader(delegate);
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class HiveMongoInputFormatTest method testTranslateCompareOp.
@Test
public void testTranslateCompareOp() {
// WHERE i >= 20
GenericUDFOPEqualOrGreaterThan gte = new GenericUDFOPEqualOrGreaterThan();
ExprNodeDesc[] children = { new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")), new ExprNodeConstantDesc(20) };
ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, gte, Arrays.asList(children));
Assert.assertEquals(new BasicDBObject("mongo_i", new BasicDBObject("$gte", 20)), filterForExpr(expr));
}
use of com.mongodb.BasicDBObject in project mongo-hadoop by mongodb.
the class BSONLoader method readField.
/**
* Convert an object from a MongoDB document into a type that Pig can
* understand, based on the expectations of the given schema.
* @param obj object from a MongoDB document
* @param field the schema describing this field
* @return an object appropriate for Pig
* @throws IOException
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
protected static Object readField(final Object obj, final ResourceFieldSchema field) throws IOException {
if (obj == null) {
return null;
}
try {
if (field == null) {
return obj;
}
switch(field.getType()) {
case DataType.INTEGER:
return Integer.parseInt(obj.toString());
case DataType.LONG:
return Long.parseLong(obj.toString());
case DataType.FLOAT:
return Float.parseFloat(obj.toString());
case DataType.DOUBLE:
return Double.parseDouble(obj.toString());
case DataType.BYTEARRAY:
return BSONLoader.convertBSONtoPigType(obj);
case DataType.CHARARRAY:
return obj.toString();
case DataType.DATETIME:
return new DateTime(obj);
case DataType.TUPLE:
ResourceSchema s = field.getSchema();
ResourceFieldSchema[] fs = s.getFields();
Tuple t = tupleFactory.newTuple(fs.length);
BasicDBObject val = (BasicDBObject) obj;
for (int j = 0; j < fs.length; j++) {
t.set(j, readField(val.get(fs[j].getName()), fs[j]));
}
return t;
case DataType.BAG:
s = field.getSchema();
fs = s.getFields();
s = fs[0].getSchema();
fs = s.getFields();
DataBag bag = bagFactory.newDefaultBag();
BasicDBList vals = (BasicDBList) obj;
for (Object val1 : vals) {
t = tupleFactory.newTuple(fs.length);
for (int k = 0; k < fs.length; k++) {
t.set(k, readField(((BasicDBObject) val1).get(fs[k].getName()), fs[k]));
}
bag.add(t);
}
return bag;
case DataType.MAP:
s = field.getSchema();
fs = s != null ? s.getFields() : null;
Map outputMap = new HashMap();
if (obj instanceof BSONObject) {
BasicBSONObject inputMap = (BasicBSONObject) obj;
for (String key : inputMap.keySet()) {
if (fs != null) {
outputMap.put(key, readField(inputMap.get(key), fs[0]));
} else {
outputMap.put(key, readField(inputMap.get(key), null));
}
}
} else if (obj instanceof DBRef) {
DBRef ref = (DBRef) obj;
outputMap.put("$ref", ref.getCollectionName());
outputMap.put("$id", ref.getId().toString());
}
return outputMap;
default:
LOG.info("asfkjabskfjbsaf default for " + field.getName());
return BSONLoader.convertBSONtoPigType(obj);
}
} catch (Exception e) {
String fieldName = field.getName() == null ? "" : field.getName();
String type = DataType.genTypeToNameMap().get(field.getType());
LOG.warn("Type " + type + " for field " + fieldName + " can not be applied to " + obj.getClass().toString());
return null;
}
}
Aggregations