Search in sources :

Example 1 with PigBoxedBSONValue

use of com.mongodb.hadoop.pig.udf.types.PigBoxedBSONValue in project mongo-hadoop by mongodb.

the class BSONStorage method getTypeForBSON.

/**
     * Returns object more suited for BSON storage. Object o corresponds to a field value in pig.
     *
     * @param o        object representing pig type to convert to BSON-like object
     * @param field    field to place o in
     * @param toIgnore name of field in Object o to ignore
     * @return an Object that can be stored as BSON.
     * @throws IOException if no schema is available from the field
     */
public static Object getTypeForBSON(final Object o, final ResourceFieldSchema field, final String toIgnore) throws IOException {
    byte dataType;
    ResourceSchema fieldInnerSchema = null;
    if (null == o) {
        return null;
    }
    if (null == field || DataType.UNKNOWN == field.getType()) {
        dataType = DataType.findType(o);
    } else {
        dataType = field.getType();
        fieldInnerSchema = field.getSchema();
    }
    if (dataType == DataType.BYTEARRAY && o instanceof Map) {
        dataType = DataType.MAP;
    }
    switch(dataType) {
        case DataType.NULL:
            return null;
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            return o;
        case DataType.BYTEARRAY:
            if (o instanceof PigBoxedBSONValue) {
                return ((PigBoxedBSONValue) o).getObject();
            }
            return o.toString();
        case DataType.CHARARRAY:
            return o;
        case DataType.DATETIME:
            return ((DateTime) o).toDate();
        //Given a TUPLE, create a Map so BSONEncoder will eat it
        case DataType.TUPLE:
            // BasicBSONEncoder will consume it as an Iterable.
            if (fieldInnerSchema == null) {
                return o;
            }
            // If there was an inner schema, create a Map from the Tuple.
            ResourceFieldSchema[] fs = fieldInnerSchema.getFields();
            // a bag should be ignored
            if (1 == fs.length && fs[0].getName().equals(toIgnore)) {
                return getTypeForBSON(((Tuple) o).get(0), fs[0], toIgnore);
            }
            // If there is more than one field in the tuple or no fields
            // to ignore, treat the Tuple as a Map.
            Map<String, Object> m = new LinkedHashMap<String, Object>();
            for (int j = 0; j < fs.length; j++) {
                m.put(fs[j].getName(), getTypeForBSON(((Tuple) o).get(j), fs[j], toIgnore));
            }
            return m;
        // Given a BAG, create an Array so BSONEncoder will eat it.
        case DataType.BAG:
            // BasicBSONEncoder will consume it as an Iterable.
            if (null == fieldInnerSchema) {
                return o;
            }
            fs = fieldInnerSchema.getFields();
            ArrayList<Object> bagList = new ArrayList<Object>();
            for (Tuple t : (DataBag) o) {
                bagList.add(getTypeForBSON(t, fs[0], toIgnore));
            }
            return bagList;
        case DataType.MAP:
            if (o == null) {
                return null;
            }
            Map map = (Map) o;
            Map<String, Object> out = new HashMap<String, Object>(map.size());
            for (Object key : map.keySet()) {
                out.put(key.toString(), getTypeForBSON(map.get(key), null, toIgnore));
            }
            return out;
        default:
            return o;
    }
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) DataBag(org.apache.pig.data.DataBag) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) PigBoxedBSONValue(com.mongodb.hadoop.pig.udf.types.PigBoxedBSONValue) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Tuple(org.apache.pig.data.Tuple)

Aggregations

PigBoxedBSONValue (com.mongodb.hadoop.pig.udf.types.PigBoxedBSONValue)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 ResourceSchema (org.apache.pig.ResourceSchema)1 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)1 DataBag (org.apache.pig.data.DataBag)1 Tuple (org.apache.pig.data.Tuple)1 DateTime (org.joda.time.DateTime)1