Search in sources :

Example 56 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.

the class BSONSerDe method deserialize.

/**
     * Given a Writable object of BSON, turn it into a Hive table row
     */
@Override
public //CHECKSTYLE:OFF
Object deserialize(final Writable writable) throws SerDeException {
    //CHECKSTYLE:ON
    BSONObject doc;
    row.clear();
    // Make sure it's a BSONWritable object
    if (writable instanceof BSONWritable) {
        doc = ((BSONWritable) writable).getDoc();
    } else {
        throw new SerDeException(format("%srequires a BSONWritable object, not%s", getClass(), writable.getClass()));
    }
    // For each field, cast it to a HIVE type and add to the current row
    Object value;
    List<String> structFieldNames = docTypeInfo.getAllStructFieldNames();
    for (String fieldName : structFieldNames) {
        try {
            TypeInfo fieldTypeInfo = docTypeInfo.getStructFieldTypeInfo(fieldName);
            // get the corresponding field name in MongoDB
            String mongoMapping;
            if (hiveToMongo == null) {
                mongoMapping = fieldName;
            } else {
                mongoMapping = hiveToMongo.containsKey(fieldName) ? hiveToMongo.get(fieldName) : fieldName;
            }
            value = deserializeField(getValue(doc, mongoMapping), fieldTypeInfo, fieldName);
        } catch (Exception e) {
            LOG.warn("Could not find the appropriate field for name " + fieldName);
            value = null;
        }
        row.add(value);
    }
    return row;
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject) BasicBSONObject(org.bson.BasicBSONObject) BSONObject(org.bson.BSONObject) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 57 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.

the class BSONSerDe method initialize.

/**
     * Finds out the information of the table, including the column names and types.
     */
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
    // regex used to split column names between commas
    String splitCols = "\\s*,\\s*";
    // Get the table column names
    String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(colNamesStr.split(splitCols));
    // Get mappings specified by the user
    if (tblProps.containsKey(MONGO_COLS)) {
        String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
        Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
        // register the hive field mappings to mongo field mappings
        hiveToMongo = new HashMap<String, String>();
        registerMappings(rules);
    }
    // Get the table column types
    String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException("Column Names and Types don't match in size");
    }
    // Get the structure and object inspector
    docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
    // Create the BSONWritable instance for future use.
    bsonWritable = new BSONWritable();
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 58 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.

the class BSONSerDe method registerMappings.

/**
     * Takes in the object represented by JSON for Hive to Mongo/BSON mapping. Records these mappings and infers upper level mappings from
     * lower level declarations.
     */
private void registerMappings(final Map<String, String> rules) throws SerDeException {
    // explode/infer shorter mappings
    for (Entry e : rules.entrySet()) {
        String key = (String) e.getKey();
        String value = (String) e.getValue();
        if (hiveToMongo.containsKey(key) && !hiveToMongo.get(key).equals(value)) {
            throw new SerDeException("Ambiguous rule definition for " + key);
        } else {
            hiveToMongo.put(key.toLowerCase(), value);
        }
        if (key.contains(".")) {
            // split by "."
            String[] miniKeys = key.split("\\.");
            String[] miniValues = value.split("\\.");
            if (miniKeys.length != miniValues.length) {
                throw new SerDeException(key + " should be of same depth as " + value);
            }
            int i = 0;
            String curKey = "", curValue = "";
            while (i < miniKeys.length - 1) {
                curKey += miniKeys[i];
                curValue += miniValues[i];
                if (hiveToMongo.containsKey(curKey) && !hiveToMongo.get(curKey).equals(curValue)) {
                    throw new SerDeException("Ambiguous rule definition for " + curKey);
                } else {
                    hiveToMongo.put(curKey.toLowerCase(), curValue);
                }
                curKey += ".";
                curValue += ".";
                i += 1;
            }
        }
    }
}
Also used : Entry(java.util.Map.Entry) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 59 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.

the class BSONSerDeTest method testStruct.

@Test
public void testStruct() throws SerDeException {
    String columnNames = "m";
    String columnTypes = "struct<one:int,two:string>";
    BasicBSONObject value = new BasicBSONObject();
    int oneValue = 10;
    String twoValue = "key";
    value.put("one", oneValue);
    value.put("two", twoValue);
    // Structs come back as arrays
    ArrayList<Object> returned = new ArrayList<Object>();
    returned.add(oneValue);
    returned.add(twoValue);
    BSONSerDe serde = new BSONSerDe();
    Object result = helpDeserialize(serde, columnNames, columnTypes, value, true);
    assertThat(returned, equalTo(result));
    // A struct must have an array or list of inner inspector types
    ArrayList<ObjectInspector> innerInspectorList = new ArrayList<ObjectInspector>();
    innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(Integer.class));
    innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class));
    // As well as a fields list
    ArrayList<String> innerFieldsList = new ArrayList<String>();
    innerFieldsList.add("one");
    innerFieldsList.add("two");
    // Then you get that inner struct's inspector
    StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(innerFieldsList, innerInspectorList);
    // Which is used to get the overall struct inspector
    StructObjectInspector oi = createObjectInspector(columnNames, structInspector);
    // This should be how it turns out
    BasicBSONObject bObject = new BasicBSONObject();
    bObject.put(columnNames, value);
    // But structs are stored as array/list inside hive, so this is passed in
    ArrayList<Object> obj = new ArrayList<Object>();
    obj.add(returned);
    Object serialized = serde.serialize(obj, oi);
    assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) BasicBSONObject(org.bson.BasicBSONObject) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 60 with SerDeException

use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.

the class BSONSerDeTest method testString.

@Test
public void testString() throws SerDeException {
    String columnNames = "s";
    String columnTypes = "string";
    String value = "value";
    BSONSerDe serde = new BSONSerDe();
    Object result = helpDeserialize(serde, columnNames, columnTypes, value);
    assertThat(value, equalTo(result));
    ObjectInspector innerInspector = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class);
    BasicBSONObject bObject = new BasicBSONObject();
    Object serialized = helpSerialize(columnNames, innerInspector, bObject, value, serde);
    assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BasicBSONObject(org.bson.BasicBSONObject) Test(org.junit.Test)

Aggregations

SerDeException (org.apache.hadoop.hive.serde2.SerDeException)124 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)108 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)100 ArrayList (java.util.ArrayList)98 Properties (java.util.Properties)59 Test (org.junit.Test)59 Configuration (org.apache.hadoop.conf.Configuration)52 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)52 Text (org.apache.hadoop.io.Text)50 IOException (java.io.IOException)37 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)33 Schema (org.apache.avro.Schema)31 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)31 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)28 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)28 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)24 Put (org.apache.hadoop.hbase.client.Put)22 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)22 IntWritable (org.apache.hadoop.io.IntWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21