use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.
the class BSONSerDe method deserialize.
/**
* Given a Writable object of BSON, turn it into a Hive table row
*/
@Override
public //CHECKSTYLE:OFF
Object deserialize(final Writable writable) throws SerDeException {
//CHECKSTYLE:ON
BSONObject doc;
row.clear();
// Make sure it's a BSONWritable object
if (writable instanceof BSONWritable) {
doc = ((BSONWritable) writable).getDoc();
} else {
throw new SerDeException(format("%srequires a BSONWritable object, not%s", getClass(), writable.getClass()));
}
// For each field, cast it to a HIVE type and add to the current row
Object value;
List<String> structFieldNames = docTypeInfo.getAllStructFieldNames();
for (String fieldName : structFieldNames) {
try {
TypeInfo fieldTypeInfo = docTypeInfo.getStructFieldTypeInfo(fieldName);
// get the corresponding field name in MongoDB
String mongoMapping;
if (hiveToMongo == null) {
mongoMapping = fieldName;
} else {
mongoMapping = hiveToMongo.containsKey(fieldName) ? hiveToMongo.get(fieldName) : fieldName;
}
value = deserializeField(getValue(doc, mongoMapping), fieldTypeInfo, fieldName);
} catch (Exception e) {
LOG.warn("Could not find the appropriate field for name " + fieldName);
value = null;
}
row.add(value);
}
return row;
}
use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.
the class BSONSerDe method initialize.
/**
* Finds out the information of the table, including the column names and types.
*/
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
// regex used to split column names between commas
String splitCols = "\\s*,\\s*";
// Get the table column names
String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
columnNames = Arrays.asList(colNamesStr.split(splitCols));
// Get mappings specified by the user
if (tblProps.containsKey(MONGO_COLS)) {
String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
// register the hive field mappings to mongo field mappings
hiveToMongo = new HashMap<String, String>();
registerMappings(rules);
}
// Get the table column types
String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException("Column Names and Types don't match in size");
}
// Get the structure and object inspector
docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
// Create the BSONWritable instance for future use.
bsonWritable = new BSONWritable();
}
use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.
the class BSONSerDe method registerMappings.
/**
* Takes in the object represented by JSON for Hive to Mongo/BSON mapping. Records these mappings and infers upper level mappings from
* lower level declarations.
*/
private void registerMappings(final Map<String, String> rules) throws SerDeException {
// explode/infer shorter mappings
for (Entry e : rules.entrySet()) {
String key = (String) e.getKey();
String value = (String) e.getValue();
if (hiveToMongo.containsKey(key) && !hiveToMongo.get(key).equals(value)) {
throw new SerDeException("Ambiguous rule definition for " + key);
} else {
hiveToMongo.put(key.toLowerCase(), value);
}
if (key.contains(".")) {
// split by "."
String[] miniKeys = key.split("\\.");
String[] miniValues = value.split("\\.");
if (miniKeys.length != miniValues.length) {
throw new SerDeException(key + " should be of same depth as " + value);
}
int i = 0;
String curKey = "", curValue = "";
while (i < miniKeys.length - 1) {
curKey += miniKeys[i];
curValue += miniValues[i];
if (hiveToMongo.containsKey(curKey) && !hiveToMongo.get(curKey).equals(curValue)) {
throw new SerDeException("Ambiguous rule definition for " + curKey);
} else {
hiveToMongo.put(curKey.toLowerCase(), curValue);
}
curKey += ".";
curValue += ".";
i += 1;
}
}
}
}
use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.
the class BSONSerDeTest method testStruct.
@Test
public void testStruct() throws SerDeException {
String columnNames = "m";
String columnTypes = "struct<one:int,two:string>";
BasicBSONObject value = new BasicBSONObject();
int oneValue = 10;
String twoValue = "key";
value.put("one", oneValue);
value.put("two", twoValue);
// Structs come back as arrays
ArrayList<Object> returned = new ArrayList<Object>();
returned.add(oneValue);
returned.add(twoValue);
BSONSerDe serde = new BSONSerDe();
Object result = helpDeserialize(serde, columnNames, columnTypes, value, true);
assertThat(returned, equalTo(result));
// A struct must have an array or list of inner inspector types
ArrayList<ObjectInspector> innerInspectorList = new ArrayList<ObjectInspector>();
innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(Integer.class));
innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class));
// As well as a fields list
ArrayList<String> innerFieldsList = new ArrayList<String>();
innerFieldsList.add("one");
innerFieldsList.add("two");
// Then you get that inner struct's inspector
StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(innerFieldsList, innerInspectorList);
// Which is used to get the overall struct inspector
StructObjectInspector oi = createObjectInspector(columnNames, structInspector);
// This should be how it turns out
BasicBSONObject bObject = new BasicBSONObject();
bObject.put(columnNames, value);
// But structs are stored as array/list inside hive, so this is passed in
ArrayList<Object> obj = new ArrayList<Object>();
obj.add(returned);
Object serialized = serde.serialize(obj, oi);
assertThat(new BSONWritable(bObject), equalTo(serialized));
}
use of org.apache.hadoop.hive.serde2.SerDeException in project mongo-hadoop by mongodb.
the class BSONSerDeTest method testString.
@Test
public void testString() throws SerDeException {
String columnNames = "s";
String columnTypes = "string";
String value = "value";
BSONSerDe serde = new BSONSerDe();
Object result = helpDeserialize(serde, columnNames, columnTypes, value);
assertThat(value, equalTo(result));
ObjectInspector innerInspector = PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class);
BasicBSONObject bObject = new BasicBSONObject();
Object serialized = helpSerialize(columnNames, innerInspector, bObject, value, serde);
assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Aggregations