Search in sources :

Example 11 with BasicBSONObject

use of org.bson.BasicBSONObject in project mongo-hadoop by mongodb.

the class BSONSerDe method serializeMap.

/**
 * Serialize a Hive Map into a BSONObject.
 * @param obj the Hive Map.
 * @param mapOI an {@code ObjectInspector} for the Hive Map.
 * @param ext the field name
 * @return a BSONObject representing the Hive Map
 */
private Object serializeMap(final Object obj, final MapObjectInspector mapOI, final String ext) {
    BasicBSONObject bsonObject = new BasicBSONObject();
    ObjectInspector mapValOI = mapOI.getMapValueObjectInspector();
    // Each value is guaranteed to be of the same type
    for (Entry<?, ?> entry : mapOI.getMap(obj).entrySet()) {
        String field = entry.getKey().toString();
        Object value = serializeObject(entry.getValue(), mapValOI, ext);
        bsonObject.put(field, value);
    }
    return bsonObject;
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject)

Example 12 with BasicBSONObject

use of org.bson.BasicBSONObject in project mongo-hadoop by mongodb.

the class BSONSerDe method initialize.

/**
 * Finds out the information of the table, including the column names and types.
 */
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
    // regex used to split column names between commas
    String splitCols = "\\s*,\\s*";
    // Get the table column names
    String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(colNamesStr.split(splitCols));
    // Get mappings specified by the user
    if (tblProps.containsKey(MONGO_COLS)) {
        String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
        Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
        // register the hive field mappings to mongo field mappings
        hiveToMongo = new HashMap<String, String>();
        registerMappings(rules);
    }
    // Get the table column types
    String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException("Column Names and Types don't match in size");
    }
    // Get the structure and object inspector
    docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
    // Create the BSONWritable instance for future use.
    bsonWritable = new BSONWritable();
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 13 with BasicBSONObject

use of org.bson.BasicBSONObject in project mongo-hadoop by mongodb.

the class BSONLoader method readField.

/**
 * Convert an object from a MongoDB document into a type that Pig can
 * understand, based on the expectations of the given schema.
 * @param obj object from a MongoDB document
 * @param field the schema describing this field
 * @return an object appropriate for Pig
 * @throws IOException
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
protected static Object readField(final Object obj, final ResourceFieldSchema field) throws IOException {
    if (obj == null) {
        return null;
    }
    try {
        if (field == null) {
            return obj;
        }
        switch(field.getType()) {
            case DataType.INTEGER:
                return Integer.parseInt(obj.toString());
            case DataType.LONG:
                return Long.parseLong(obj.toString());
            case DataType.FLOAT:
                return Float.parseFloat(obj.toString());
            case DataType.DOUBLE:
                return Double.parseDouble(obj.toString());
            case DataType.BYTEARRAY:
                return BSONLoader.convertBSONtoPigType(obj);
            case DataType.CHARARRAY:
                return obj.toString();
            case DataType.DATETIME:
                return new DateTime(obj);
            case DataType.TUPLE:
                ResourceSchema s = field.getSchema();
                ResourceFieldSchema[] fs = s.getFields();
                Tuple t = tupleFactory.newTuple(fs.length);
                BasicDBObject val = (BasicDBObject) obj;
                for (int j = 0; j < fs.length; j++) {
                    t.set(j, readField(val.get(fs[j].getName()), fs[j]));
                }
                return t;
            case DataType.BAG:
                s = field.getSchema();
                fs = s.getFields();
                s = fs[0].getSchema();
                fs = s.getFields();
                DataBag bag = bagFactory.newDefaultBag();
                BasicDBList vals = (BasicDBList) obj;
                for (Object val1 : vals) {
                    t = tupleFactory.newTuple(fs.length);
                    for (int k = 0; k < fs.length; k++) {
                        t.set(k, readField(((BasicDBObject) val1).get(fs[k].getName()), fs[k]));
                    }
                    bag.add(t);
                }
                return bag;
            case DataType.MAP:
                s = field.getSchema();
                fs = s != null ? s.getFields() : null;
                Map outputMap = new HashMap();
                if (obj instanceof BSONObject) {
                    BasicBSONObject inputMap = (BasicBSONObject) obj;
                    for (String key : inputMap.keySet()) {
                        if (fs != null) {
                            outputMap.put(key, readField(inputMap.get(key), fs[0]));
                        } else {
                            outputMap.put(key, readField(inputMap.get(key), null));
                        }
                    }
                } else if (obj instanceof DBRef) {
                    DBRef ref = (DBRef) obj;
                    outputMap.put("$ref", ref.getCollectionName());
                    outputMap.put("$id", ref.getId().toString());
                }
                return outputMap;
            default:
                LOG.info("asfkjabskfjbsaf default for " + field.getName());
                return BSONLoader.convertBSONtoPigType(obj);
        }
    } catch (Exception e) {
        String fieldName = field.getName() == null ? "" : field.getName();
        String type = DataType.genTypeToNameMap().get(field.getType());
        LOG.warn("Type " + type + " for field " + fieldName + " can not be applied to " + obj.getClass().toString());
        return null;
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) DataBag(org.apache.pig.data.DataBag) HashMap(java.util.HashMap) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject) DBRef(com.mongodb.DBRef) DateTime(org.joda.time.DateTime) ExecException(org.apache.pig.backend.executionengine.ExecException) IOException(java.io.IOException) BasicDBObject(com.mongodb.BasicDBObject) BasicDBList(com.mongodb.BasicDBList) BasicBSONObject(org.bson.BasicBSONObject) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) BasicBSONObject(org.bson.BasicBSONObject) BasicDBObject(com.mongodb.BasicDBObject) BSONObject(org.bson.BSONObject) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Example 14 with BasicBSONObject

use of org.bson.BasicBSONObject in project mongo-hadoop by mongodb.

the class BSONSerDeTest method helpSerialize.

/**
 * Given the column names and the object inspector, the serialized object result. Notice how the fieldNames and the fieldInspectors are
 * both Lists.
 */
private Object helpSerialize(final String columnNames, final ObjectInspector inner, final BasicBSONObject bObject, final Object value, final BSONSerDe serde) throws SerDeException {
    StructObjectInspector oi = createObjectInspector(columnNames, inner);
    bObject.put(columnNames, value);
    // Structs in Hive are actually arrays/lists of objects
    ArrayList<Object> obj = new ArrayList<Object>();
    obj.add(value);
    return serde.serialize(obj, oi);
}
Also used : ArrayList(java.util.ArrayList) BasicBSONObject(org.bson.BasicBSONObject) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 15 with BasicBSONObject

use of org.bson.BasicBSONObject in project mongo-hadoop by mongodb.

the class BSONSerDeTest method testString.

@Test
public void testString() throws SerDeException {
    String columnNames = "s";
    String columnTypes = "string";
    String value = "value";
    BSONSerDe serde = new BSONSerDe();
    Object result = helpDeserialize(serde, columnNames, columnTypes, value);
    assertThat(value, equalTo(result));
    ObjectInspector innerInspector = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class);
    BasicBSONObject bObject = new BasicBSONObject();
    Object serialized = helpSerialize(columnNames, innerInspector, bObject, value, serde);
    assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BasicBSONObject(org.bson.BasicBSONObject) Test(org.junit.Test)

Aggregations

BasicBSONObject (org.bson.BasicBSONObject)88 Test (org.junit.Test)39 BSONObject (org.bson.BSONObject)37 ArrayList (java.util.ArrayList)15 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 BSONWritable (com.mongodb.hadoop.io.BSONWritable)13 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)13 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)13 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)13 BasicDBObject (com.mongodb.BasicDBObject)11 ObjectId (org.bson.types.ObjectId)11 IOException (java.io.IOException)8 ByteArrayInputStream (java.io.ByteArrayInputStream)6 DataBag (org.apache.pig.data.DataBag)6 Map (java.util.Map)5 Tuple (org.apache.pig.data.Tuple)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 Mongo (com.mongodb.Mongo)4 Date (java.util.Date)4 DoubleWritable (org.apache.hadoop.io.DoubleWritable)4