use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.
the class TypeInfoToSchema method createAvroMap.
private Schema createAvroMap(TypeInfo typeInfo) {
TypeInfo keyTypeInfo = ((MapTypeInfo) typeInfo).getMapKeyTypeInfo();
if (((PrimitiveTypeInfo) keyTypeInfo).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UnsupportedOperationException("Key of Map can only be a String");
}
TypeInfo valueTypeInfo = ((MapTypeInfo) typeInfo).getMapValueTypeInfo();
Schema valueSchema = createAvroSchema(valueTypeInfo);
return Schema.createMap(valueSchema);
}
use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.
the class TestAccumuloSerDe method testStructOfMapSerialization.
@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<String> structColNames = Arrays.asList("map1", "map2");
TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
// struct<map1:map<string,string>,map2:map<string,string>>,string
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
// Use the default separators [0, 1, 2, 3, ..., 7]
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
byte[] seps = serDeParams.getSeparators();
// struct<map<k:v,k:v>_map<k:v,k:v>>>
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
map1.put("key10", "value10");
map1.put("key11", "value11");
map2.put("key20", "value20");
map2.put("key21", "value21");
ByteArrayRef byteRef = new ByteArrayRef();
// Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
// (byte) 2
// The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
// \x05
String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
byteRef.setData((accumuloRow + "\2foo").getBytes());
entireStruct.init(byteRef, 0, byteRef.getData().length);
Mutation m = serializer.serialize(entireStruct, structOI);
Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
Assert.assertEquals(1, m.getUpdates().size());
ColumnUpdate update = m.getUpdates().get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getValue()));
AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
haRow.add("cf", "cq", "foo".getBytes());
LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
List<Object> objects = lazyAccumuloRow.getFieldsAsList();
Assert.assertEquals(2, objects.size());
Assert.assertEquals("foo", objects.get(1).toString());
LazyStruct rowStruct = (LazyStruct) objects.get(0);
List<Object> rowObjects = rowStruct.getFieldsAsList();
Assert.assertEquals(2, rowObjects.size());
LazyMap rowMap = (LazyMap) rowObjects.get(0);
Map<?, ?> actualMap = rowMap.getMap();
System.out.println("Actual map 1: " + actualMap);
Map<String, String> actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map1, actualStringMap);
rowMap = (LazyMap) rowObjects.get(1);
actualMap = rowMap.getMap();
System.out.println("Actual map 2: " + actualMap);
actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map2, actualStringMap);
}
use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.
the class ColumnMappings method setHiveColumnDescription.
void setHiveColumnDescription(String serdeName, List<String> columnNames, List<TypeInfo> columnTypes) throws SerDeException {
if (columnsMapping.length != columnNames.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while hbase.columns.mapping has " + columnsMapping.length + " elements" + " (counting the key if implicit)");
}
// where key extends LazyPrimitive<?, ?> and thus has type Category.PRIMITIVE
for (int i = 0; i < columnNames.size(); i++) {
ColumnMapping colMap = columnsMapping[i];
colMap.columnName = columnNames.get(i);
colMap.columnType = columnTypes.get(i);
if (colMap.qualifierName == null && !colMap.hbaseRowKey && !colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if ((typeInfo.getCategory() != ObjectInspector.Category.MAP) || (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE)) {
throw new SerDeException(serdeName + ": hbase column family '" + colMap.familyName + "' should be mapped to Map<? extends LazyPrimitive<?, ?>,?>, that is " + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName());
}
}
if (colMap.hbaseTimestamp) {
TypeInfo typeInfo = columnTypes.get(i);
if (!colMap.isCategory(PrimitiveCategory.TIMESTAMP) && !colMap.isCategory(PrimitiveCategory.LONG)) {
throw new SerDeException(serdeName + ": timestamp columns should be of " + "timestamp or bigint type, but is mapped to " + typeInfo.getTypeName());
}
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.
the class ColumnMappings method parseColumnStorageTypes.
/**
* Utility method for parsing a string of the form '-,b,s,-,s:b,...' as a means of specifying
* whether to use a binary or an UTF string format to serialize and de-serialize primitive
* data types like boolean, byte, short, int, long, float, and double. This applies to
* regular columns and also to map column types which are associated with an HBase column
* family. For the map types, we apply the specification to the key or the value provided it
* is one of the above primitive types. The specifier is a colon separated value of the form
* -:s, or b:b where we have 's', 'b', or '-' on either side of the colon. 's' is for string
* format storage, 'b' is for native fixed width byte oriented storage, and '-' uses the
* table level default.
*
* @param hbaseTableDefaultStorageType - the specification associated with the table property
* hbase.table.default.storage.type
* @throws SerDeException on parse error.
*/
void parseColumnStorageTypes(String hbaseTableDefaultStorageType) throws SerDeException {
boolean tableBinaryStorage = false;
if (hbaseTableDefaultStorageType != null && !"".equals(hbaseTableDefaultStorageType)) {
if (hbaseTableDefaultStorageType.equals("binary")) {
tableBinaryStorage = true;
} else if (!hbaseTableDefaultStorageType.equals("string")) {
throw new SerDeException("Error: " + HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE + " parameter must be specified as" + " 'string' or 'binary'; '" + hbaseTableDefaultStorageType + "' is not a valid specification for this table/serde property.");
}
}
// byte, short, int, long, float, and double have a binary byte oriented storage option
for (ColumnMapping colMap : columnsMapping) {
TypeInfo colType = colMap.columnType;
String mappingSpec = colMap.mappingSpec;
String[] mapInfo = mappingSpec.split("#");
String[] storageInfo = null;
if (mapInfo.length == 2) {
storageInfo = mapInfo[1].split(":");
}
if (storageInfo == null) {
// use the table default storage specification
if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE) {
if (!colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
} else if (colType.getCategory() == ObjectInspector.Category.MAP) {
TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
} else if (storageInfo.length == 1) {
// we have a storage specification for a primitive column type
String storageOption = storageInfo[0];
if ((colType.getCategory() == ObjectInspector.Category.MAP) || !(storageOption.equals("-") || "string".startsWith(storageOption) || "binary".startsWith(storageOption))) {
throw new SerDeException("Error: A column storage specification is one of the following:" + " '-', a prefix of 'string', or a prefix of 'binary'. " + storageOption + " is not a valid storage option specification for " + colMap.columnName);
}
if (colType.getCategory() == ObjectInspector.Category.PRIMITIVE && !colType.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if ("-".equals(storageOption)) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(storageOption)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
} else if (storageInfo.length == 2) {
// we have a storage specification for a map column type
String keyStorage = storageInfo[0];
String valStorage = storageInfo[1];
if ((colType.getCategory() != ObjectInspector.Category.MAP) || !(keyStorage.equals("-") || "string".startsWith(keyStorage) || "binary".startsWith(keyStorage)) || !(valStorage.equals("-") || "string".startsWith(valStorage) || "binary".startsWith(valStorage))) {
throw new SerDeException("Error: To specify a valid column storage type for a Map" + " column, use any two specifiers from '-', a prefix of 'string', " + " and a prefix of 'binary' separated by a ':'." + " Valid examples are '-:-', 's:b', etc. They specify the storage type for the" + " key and value parts of the Map<?,?> respectively." + " Invalid storage specification for column " + colMap.columnName + "; " + storageInfo[0] + ":" + storageInfo[1]);
}
TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo();
TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo();
if (keyTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !keyTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if (keyStorage.equals("-")) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(keyStorage)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
if (valueTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE && !valueTypeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) {
if (valStorage.equals("-")) {
colMap.binaryStorage.add(tableBinaryStorage);
} else if ("binary".startsWith(valStorage)) {
colMap.binaryStorage.add(true);
} else {
colMap.binaryStorage.add(false);
}
} else {
colMap.binaryStorage.add(false);
}
if (colMap.binaryStorage.size() != 2) {
throw new SerDeException("Error: In parsing the storage specification for column " + colMap.columnName);
}
} else {
// error in storage specification
throw new SerDeException("Error: " + HBaseSerDe.HBASE_COLUMNS_MAPPING + " storage specification " + mappingSpec + " is not valid for column: " + colMap.columnName);
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo in project hive by apache.
the class HCatRecordObjectInspectorFactory method getStandardObjectInspectorFromTypeInfo.
public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
ObjectInspector oi = cachedObjectInspectors.get(typeInfo);
if (oi == null) {
LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName());
switch(typeInfo.getCategory()) {
case PRIMITIVE:
oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) typeInfo);
break;
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
}
oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
break;
case LIST:
ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo).getListElementTypeInfo());
oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
break;
case MAP:
ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo(((MapTypeInfo) typeInfo).getMapKeyTypeInfo());
ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo(((MapTypeInfo) typeInfo).getMapValueTypeInfo());
oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector);
break;
default:
oi = null;
}
cachedObjectInspectors.put(typeInfo, oi);
}
return oi;
}
Aggregations