use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class BSONSerDe method serializeStruct.
/**
* Turn a Hive struct into a BasicBSONObject.
* @param obj the Hive struct
* @param structOI an {@code ObjectInspector} for the struct
* @param ext the field name
* @return a BasicBSONObject representing the Hive struct
*/
private Object serializeStruct(final Object obj, final StructObjectInspector structOI, final String ext) {
if (ext.length() > 0 && isObjectIdStruct(obj, structOI)) {
String objectIdString = "";
for (StructField s : structOI.getAllStructFieldRefs()) {
if (s.getFieldName().equals(OID)) {
objectIdString = structOI.getStructFieldData(obj, s).toString();
break;
}
}
return new ObjectId(objectIdString);
} else {
BasicBSONObject bsonObject = new BasicBSONObject();
// fields is the list of all variable names and information within the struct obj
List<? extends StructField> fields = structOI.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField field = fields.get(i);
String fieldName, hiveMapping;
// get corresponding mongoDB field
if (ext.length() == 0) {
fieldName = columnNames.get(i);
hiveMapping = fieldName;
} else {
fieldName = field.getFieldName();
hiveMapping = ext + "." + fieldName;
}
ObjectInspector fieldOI = field.getFieldObjectInspector();
Object fieldObj = structOI.getStructFieldData(obj, field);
if (hiveToMongo != null && hiveToMongo.containsKey(hiveMapping)) {
String mongoMapping = hiveToMongo.get(hiveMapping);
int lastDotPos = mongoMapping.lastIndexOf(".");
String lastMapping = lastDotPos == -1 ? mongoMapping : mongoMapping.substring(lastDotPos + 1);
bsonObject.put(lastMapping, serializeObject(fieldObj, fieldOI, hiveMapping));
} else {
bsonObject.put(fieldName, serializeObject(fieldObj, fieldOI, hiveMapping));
}
}
return bsonObject;
}
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class TreasuryYieldMapper method map.
@Override
@SuppressWarnings("deprecation")
public void map(final Object key, final BSONWritable value, final OutputCollector<IntWritable, DoubleWritable> output, final Reporter reporter) throws IOException {
BSONObject pValue = value.getDoc();
keyInt.set(((Date) pValue.get("_id")).getYear() + 1900);
valueDouble.set(((Number) pValue.get("bc10Year")).doubleValue());
output.collect(keyInt, valueDouble);
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class MongoLoader method pushProjection.
@Override
public RequiredFieldResponse pushProjection(final RequiredFieldList requiredFieldList) throws FrontendException {
// this.
if (null == schema) {
return new RequiredFieldResponse(false);
}
BSONObject projection = new BasicBSONObject();
boolean needId = false;
for (RequiredField field : requiredFieldList.getFields()) {
String fieldName = field.getAlias();
if (idAlias != null && idAlias.equals(fieldName)) {
fieldName = "_id";
needId = true;
}
List<RequiredField> subFields = field.getSubFields();
if (subFields != null && !subFields.isEmpty()) {
// Pig is limited to populating at most one subfield level deep.
for (RequiredField subField : subFields) {
projection.put(fieldName + "." + subField.getAlias(), true);
}
} else {
projection.put(fieldName, true);
}
}
// Turn off _id unless asked for.
if (!needId) {
projection.put("_id", false);
}
LOG.debug("projection: " + projection);
// Store projection to be retrieved later and stored into the job
// configuration.
getUDFProperties().setProperty(MongoConfigUtil.INPUT_FIELDS, JSON.serialize(projection));
// Return a response indicating that we can honor the projection.
return new RequiredFieldResponse(true);
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class DataframeExample method run.
public void run() {
JavaSparkContext sc = new JavaSparkContext(new SparkConf());
// Set configuration options for the MongoDB Hadoop Connector.
Configuration mongodbConfig = new Configuration();
// MongoInputFormat allows us to read from a live MongoDB instance.
// We could also use BSONFileInputFormat to read BSON snapshots.
mongodbConfig.set("mongo.job.input.format", "com.mongodb.hadoop.MongoInputFormat");
// MongoDB connection string naming a collection to use.
// If using BSON, use "mapred.input.dir" to configure the directory
// where BSON files are located instead.
mongodbConfig.set("mongo.input.uri", "mongodb://localhost:27017/enron_mail.messages");
// Create an RDD backed by the MongoDB collection.
JavaPairRDD<Object, BSONObject> documents = sc.newAPIHadoopRDD(// Configuration
mongodbConfig, // InputFormat: read from a live cluster.
MongoInputFormat.class, // Key class
Object.class, // Value class
BSONObject.class);
JavaRDD<Message> messages = documents.map(new Function<Tuple2<Object, BSONObject>, Message>() {
public Message call(final Tuple2<Object, BSONObject> tuple) {
Message m = new Message();
BSONObject header = (BSONObject) tuple._2().get("headers");
m.setTo((String) header.get("To"));
m.setxFrom((String) header.get("From"));
m.setMessageID((String) header.get("Message-ID"));
m.setBody((String) tuple._2().get("body"));
return m;
}
});
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
DataFrame messagesSchema = sqlContext.createDataFrame(messages, Message.class);
messagesSchema.registerTempTable("messages");
DataFrame ericsMessages = sqlContext.sql("SELECT to, body FROM messages WHERE to = \"eric.bass@enron.com\"");
ericsMessages.show();
messagesSchema.printSchema();
}
use of org.bson.BSONObject in project mongo-hadoop by mongodb.
the class EnronMailReducer method reduce.
@Override
public void reduce(final MailPair key, final Iterator<IntWritable> values, final OutputCollector<BSONWritable, IntWritable> output, final Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
BSONObject outDoc = BasicDBObjectBuilder.start().add("f", key.getFrom()).add("t", key.getTo()).get();
reduceResult.setDoc(outDoc);
intw.set(sum);
output.collect(reduceResult, intw);
}
Aggregations