Search in sources :

Example 86 with BSONObject

use of org.bson.BSONObject in project mongo-hadoop by mongodb.

the class BSONSerDe method serializeStruct.

/**
 * Turn a Hive struct into a BasicBSONObject.
 * @param obj the Hive struct
 * @param structOI an {@code ObjectInspector} for the struct
 * @param ext the field name
 * @return a BasicBSONObject representing the Hive struct
 */
private Object serializeStruct(final Object obj, final StructObjectInspector structOI, final String ext) {
    if (ext.length() > 0 && isObjectIdStruct(obj, structOI)) {
        String objectIdString = "";
        for (StructField s : structOI.getAllStructFieldRefs()) {
            if (s.getFieldName().equals(OID)) {
                objectIdString = structOI.getStructFieldData(obj, s).toString();
                break;
            }
        }
        return new ObjectId(objectIdString);
    } else {
        BasicBSONObject bsonObject = new BasicBSONObject();
        // fields is the list of all variable names and information within the struct obj
        List<? extends StructField> fields = structOI.getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
            StructField field = fields.get(i);
            String fieldName, hiveMapping;
            // get corresponding mongoDB field
            if (ext.length() == 0) {
                fieldName = columnNames.get(i);
                hiveMapping = fieldName;
            } else {
                fieldName = field.getFieldName();
                hiveMapping = ext + "." + fieldName;
            }
            ObjectInspector fieldOI = field.getFieldObjectInspector();
            Object fieldObj = structOI.getStructFieldData(obj, field);
            if (hiveToMongo != null && hiveToMongo.containsKey(hiveMapping)) {
                String mongoMapping = hiveToMongo.get(hiveMapping);
                int lastDotPos = mongoMapping.lastIndexOf(".");
                String lastMapping = lastDotPos == -1 ? mongoMapping : mongoMapping.substring(lastDotPos + 1);
                bsonObject.put(lastMapping, serializeObject(fieldObj, fieldOI, hiveMapping));
            } else {
                bsonObject.put(fieldName, serializeObject(fieldObj, fieldOI, hiveMapping));
            }
        }
        return bsonObject;
    }
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ObjectId(org.bson.types.ObjectId) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject)

Example 87 with BSONObject

use of org.bson.BSONObject in project mongo-hadoop by mongodb.

the class TreasuryYieldMapper method map.

@Override
@SuppressWarnings("deprecation")
public void map(final Object key, final BSONWritable value, final OutputCollector<IntWritable, DoubleWritable> output, final Reporter reporter) throws IOException {
    BSONObject pValue = value.getDoc();
    keyInt.set(((Date) pValue.get("_id")).getYear() + 1900);
    valueDouble.set(((Number) pValue.get("bc10Year")).doubleValue());
    output.collect(keyInt, valueDouble);
}
Also used : BSONObject(org.bson.BSONObject) Date(java.util.Date)

Example 88 with BSONObject

use of org.bson.BSONObject in project mongo-hadoop by mongodb.

the class MongoLoader method pushProjection.

@Override
public RequiredFieldResponse pushProjection(final RequiredFieldList requiredFieldList) throws FrontendException {
    // this.
    if (null == schema) {
        return new RequiredFieldResponse(false);
    }
    BSONObject projection = new BasicBSONObject();
    boolean needId = false;
    for (RequiredField field : requiredFieldList.getFields()) {
        String fieldName = field.getAlias();
        if (idAlias != null && idAlias.equals(fieldName)) {
            fieldName = "_id";
            needId = true;
        }
        List<RequiredField> subFields = field.getSubFields();
        if (subFields != null && !subFields.isEmpty()) {
            // Pig is limited to populating at most one subfield level deep.
            for (RequiredField subField : subFields) {
                projection.put(fieldName + "." + subField.getAlias(), true);
            }
        } else {
            projection.put(fieldName, true);
        }
    }
    // Turn off _id unless asked for.
    if (!needId) {
        projection.put("_id", false);
    }
    LOG.debug("projection: " + projection);
    // Store projection to be retrieved later and stored into the job
    // configuration.
    getUDFProperties().setProperty(MongoConfigUtil.INPUT_FIELDS, JSON.serialize(projection));
    // Return a response indicating that we can honor the projection.
    return new RequiredFieldResponse(true);
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject)

Example 89 with BSONObject

use of org.bson.BSONObject in project mongo-hadoop by mongodb.

the class DataframeExample method run.

public void run() {
    JavaSparkContext sc = new JavaSparkContext(new SparkConf());
    // Set configuration options for the MongoDB Hadoop Connector.
    Configuration mongodbConfig = new Configuration();
    // MongoInputFormat allows us to read from a live MongoDB instance.
    // We could also use BSONFileInputFormat to read BSON snapshots.
    mongodbConfig.set("mongo.job.input.format", "com.mongodb.hadoop.MongoInputFormat");
    // MongoDB connection string naming a collection to use.
    // If using BSON, use "mapred.input.dir" to configure the directory
    // where BSON files are located instead.
    mongodbConfig.set("mongo.input.uri", "mongodb://localhost:27017/enron_mail.messages");
    // Create an RDD backed by the MongoDB collection.
    JavaPairRDD<Object, BSONObject> documents = sc.newAPIHadoopRDD(// Configuration
    mongodbConfig, // InputFormat: read from a live cluster.
    MongoInputFormat.class, // Key class
    Object.class, // Value class
    BSONObject.class);
    JavaRDD<Message> messages = documents.map(new Function<Tuple2<Object, BSONObject>, Message>() {

        public Message call(final Tuple2<Object, BSONObject> tuple) {
            Message m = new Message();
            BSONObject header = (BSONObject) tuple._2().get("headers");
            m.setTo((String) header.get("To"));
            m.setxFrom((String) header.get("From"));
            m.setMessageID((String) header.get("Message-ID"));
            m.setBody((String) tuple._2().get("body"));
            return m;
        }
    });
    SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
    DataFrame messagesSchema = sqlContext.createDataFrame(messages, Message.class);
    messagesSchema.registerTempTable("messages");
    DataFrame ericsMessages = sqlContext.sql("SELECT to, body FROM messages WHERE to = \"eric.bass@enron.com\"");
    ericsMessages.show();
    messagesSchema.printSchema();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) BSONObject(org.bson.BSONObject) DataFrame(org.apache.spark.sql.DataFrame) Tuple2(scala.Tuple2) BSONObject(org.bson.BSONObject) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) SparkConf(org.apache.spark.SparkConf) SQLContext(org.apache.spark.sql.SQLContext)

Example 90 with BSONObject

use of org.bson.BSONObject in project mongo-hadoop by mongodb.

the class EnronMailReducer method reduce.

@Override
public void reduce(final MailPair key, final Iterator<IntWritable> values, final OutputCollector<BSONWritable, IntWritable> output, final Reporter reporter) throws IOException {
    int sum = 0;
    while (values.hasNext()) {
        sum += values.next().get();
    }
    BSONObject outDoc = BasicDBObjectBuilder.start().add("f", key.getFrom()).add("t", key.getTo()).get();
    reduceResult.setDoc(outDoc);
    intw.set(sum);
    output.collect(reduceResult, intw);
}
Also used : BSONObject(org.bson.BSONObject)

Aggregations

BSONObject (org.bson.BSONObject)101 BasicBSONObject (org.bson.BasicBSONObject)49 Test (org.junit.Test)34 BasicDBObject (com.mongodb.BasicDBObject)19 SerializableString (com.fasterxml.jackson.core.SerializableString)14 SerializedString (com.fasterxml.jackson.core.io.SerializedString)14 LinkedHashMap (java.util.LinkedHashMap)14 ByteArrayInputStream (java.io.ByteArrayInputStream)13 IOException (java.io.IOException)11 DBObject (com.mongodb.DBObject)10 ArrayList (java.util.ArrayList)9 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 Map (java.util.Map)6 BasicBSONDecoder (org.bson.BasicBSONDecoder)6 BSONDecoder (org.bson.BSONDecoder)5 BSONEncoder (org.bson.BSONEncoder)5 BasicBSONEncoder (org.bson.BasicBSONEncoder)5 LazyBSONObject (org.bson.LazyBSONObject)5 SQLExpr (com.alibaba.druid.sql.ast.SQLExpr)4 BSONFileSplit (com.mongodb.hadoop.input.BSONFileSplit)4