use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class HBaseUtils method deserializePartitionKey.
// Deserialize a partition key and return _only_ the partition values.
private static List<String> deserializePartitionKey(List<FieldSchema> partitions, byte[] key, Configuration conf) {
StringBuffer names = new StringBuffer();
names.append("dbName,tableName,");
StringBuffer types = new StringBuffer();
types.append("string,string,");
for (int i = 0; i < partitions.size(); i++) {
names.append(partitions.get(i).getName());
types.append(TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType()));
if (i != partitions.size() - 1) {
names.append(",");
types.append(",");
}
}
BinarySortableSerDe serDe = new BinarySortableSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
try {
serDe.initialize(conf, props);
List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size() + 2);
List<String> partitionKeys = new ArrayList<String>();
for (int i = 0; i < deserializedkeys.size(); i++) {
Object deserializedKey = deserializedkeys.get(i);
if (deserializedKey == null) {
partitionKeys.add(HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME));
} else {
TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType());
ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
partitionKeys.add((String) converter.convert(deserializedKey));
}
}
return partitionKeys;
} catch (SerDeException e) {
throw new RuntimeException("Error when deserialize key", e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class HBaseUtils method buildSerializedPartitionKey.
static byte[] buildSerializedPartitionKey(String dbName, String tableName, List<String> partTypes, Object[] components, boolean endPrefix) {
ObjectInspector javaStringOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveCategory.STRING);
Object[] data = new Object[components.length + 2];
List<ObjectInspector> fois = new ArrayList<ObjectInspector>(components.length + 2);
boolean[] endPrefixes = new boolean[components.length + 2];
data[0] = dbName;
fois.add(javaStringOI);
endPrefixes[0] = false;
data[1] = tableName;
fois.add(javaStringOI);
endPrefixes[1] = false;
for (int i = 0; i < components.length; i++) {
data[i + 2] = components[i];
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(partTypes.get(i));
ObjectInspector outputOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(expectedType);
fois.add(outputOI);
}
Output output = new Output();
try {
BinarySortableSerDeWithEndPrefix.serializeStruct(output, data, fois, endPrefix);
} catch (SerDeException e) {
throw new RuntimeException("Cannot serialize partition " + StringUtils.join(components, ","));
}
return Arrays.copyOf(output.getData(), output.getLength());
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorRandomRowSource method chooseSchema.
private void chooseSchema() {
HashSet hashSet = null;
boolean allTypes;
boolean onlyOne = (r.nextInt(100) == 7);
if (onlyOne) {
columnCount = 1;
allTypes = false;
} else {
allTypes = r.nextBoolean();
if (allTypes) {
// One of each type.
columnCount = possibleHiveTypeNames.length;
hashSet = new HashSet<Integer>();
} else {
columnCount = 1 + r.nextInt(20);
}
}
typeNames = new ArrayList<String>(columnCount);
primitiveCategories = new PrimitiveCategory[columnCount];
primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
List<String> columnNames = new ArrayList<String>(columnCount);
for (int c = 0; c < columnCount; c++) {
columnNames.add(String.format("col%d", c));
String typeName;
if (onlyOne) {
typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
} else {
int typeNum;
if (allTypes) {
while (true) {
typeNum = r.nextInt(possibleHiveTypeNames.length);
Integer typeNumInteger = new Integer(typeNum);
if (!hashSet.contains(typeNumInteger)) {
hashSet.add(typeNumInteger);
break;
}
}
} else {
typeNum = r.nextInt(possibleHiveTypeNames.length);
}
typeName = possibleHiveTypeNames[typeNum];
}
if (typeName.equals("char")) {
int maxLength = 1 + r.nextInt(100);
typeName = String.format("char(%d)", maxLength);
} else if (typeName.equals("varchar")) {
int maxLength = 1 + r.nextInt(100);
typeName = String.format("varchar(%d)", maxLength);
} else if (typeName.equals("decimal")) {
typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
}
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
primitiveTypeInfos[c] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
primitiveCategories[c] = primitiveCategory;
primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
typeNames.add(typeName);
}
rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
alphabets = new String[columnCount];
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class TestStandardObjectInspectors method testStandardUnionObjectInspector.
@SuppressWarnings("unchecked")
public void testStandardUnionObjectInspector() throws Throwable {
try {
ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
// add primitive types
objectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
// add a list
objectInspectors.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector));
// add a map
objectInspectors.add(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector));
// add a struct
List<String> fieldNames = new ArrayList<String>();
fieldNames.add("myDouble");
fieldNames.add("myLong");
ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>();
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
objectInspectors.add(ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors));
StandardUnionObjectInspector uoi1 = ObjectInspectorFactory.getStandardUnionObjectInspector(objectInspectors);
StandardUnionObjectInspector uoi2 = ObjectInspectorFactory.getStandardUnionObjectInspector((ArrayList<ObjectInspector>) objectInspectors.clone());
assertEquals(uoi1, uoi2);
assertEquals(ObjectInspectorUtils.getObjectInspectorName(uoi1), ObjectInspectorUtils.getObjectInspectorName(uoi2));
assertTrue(ObjectInspectorUtils.compareTypes(uoi1, uoi2));
// compareSupported returns false because Union can contain
// an object of Map
assertFalse(ObjectInspectorUtils.compareSupported(uoi1));
// construct unionObjectInspector without Map field.
ArrayList<ObjectInspector> ois = (ArrayList<ObjectInspector>) objectInspectors.clone();
ois.set(4, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
assertTrue(ObjectInspectorUtils.compareSupported(ObjectInspectorFactory.getStandardUnionObjectInspector(ois)));
// metadata
assertEquals(Category.UNION, uoi1.getCategory());
List<? extends ObjectInspector> uois = uoi1.getObjectInspectors();
assertEquals(6, uois.size());
for (int i = 0; i < 6; i++) {
assertEquals(objectInspectors.get(i), uois.get(i));
}
StringBuilder unionTypeName = new StringBuilder();
unionTypeName.append("uniontype<");
for (int i = 0; i < uois.size(); i++) {
if (i > 0) {
unionTypeName.append(",");
}
unionTypeName.append(uois.get(i).getTypeName());
}
unionTypeName.append(">");
assertEquals(unionTypeName.toString(), uoi1.getTypeName());
// TypeInfo
TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1);
assertEquals(Category.UNION, typeInfo1.getCategory());
assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName());
assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName());
assertEquals(typeInfo1, TypeInfoUtils.getTypeInfoFromTypeString(uoi1.getTypeName()));
TypeInfo typeInfo2 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi2);
assertEquals(typeInfo1, typeInfo2);
assertEquals(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo2));
assertEquals(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo2));
// null
assertNull(uoi1.getField(null));
assertEquals(-1, uoi1.getTag(null));
// Union
UnionObject union = new StandardUnion((byte) 0, 1);
assertEquals(0, uoi1.getTag(union));
assertEquals(1, uoi1.getField(union));
assertEquals("{0:1}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 0, 1), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(1));
union = new StandardUnion((byte) 1, "two");
assertEquals(1, uoi1.getTag(union));
assertEquals("two", uoi1.getField(union));
assertEquals("{1:\"two\"}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 1, "two"), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals("two"));
union = new StandardUnion((byte) 2, true);
assertEquals(2, uoi1.getTag(union));
assertEquals(true, uoi1.getField(union));
assertEquals("{2:true}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 2, true), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(true));
ArrayList<Integer> iList = new ArrayList<Integer>();
iList.add(4);
iList.add(5);
union = new StandardUnion((byte) 3, iList);
assertEquals(3, uoi1.getTag(union));
assertEquals(iList, uoi1.getField(union));
assertEquals("{3:[4,5]}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 3, iList.clone()), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(iList));
HashMap<Integer, String> map = new HashMap<Integer, String>();
map.put(6, "six");
map.put(7, "seven");
map.put(8, "eight");
union = new StandardUnion((byte) 4, map);
assertEquals(4, uoi1.getTag(union));
assertEquals(map, uoi1.getField(union));
assertEquals("{4:{6:\"six\",7:\"seven\",8:\"eight\"}}", SerDeUtils.getJSONString(union, uoi1));
Throwable th = null;
try {
ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 4, map.clone()), uoi2, null);
} catch (Throwable t) {
th = t;
}
assertNotNull(th);
assertEquals("Compare on map type not supported!", th.getMessage());
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(map));
ArrayList<Object> struct = new ArrayList<Object>(2);
struct.add(9.0);
struct.add(10L);
union = new StandardUnion((byte) 5, struct);
assertEquals(5, uoi1.getTag(union));
assertEquals(struct, uoi1.getField(union));
assertEquals("{5:{\"mydouble\":9.0,\"mylong\":10}}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 5, struct.clone()), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(struct));
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString in project hive by apache.
the class VectorizationContext method instantiateExpression.
private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnType, Object... args) throws HiveException {
VectorExpression ve = null;
Constructor<?> ctor = getConstructor(vclass);
int numParams = ctor.getParameterTypes().length;
int argsLength = (args == null) ? 0 : args.length;
if (numParams == 0) {
try {
ve = (VectorExpression) ctor.newInstance();
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength) {
try {
ve = (VectorExpression) ctor.newInstance(args);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + getStackTraceAsSingleLine(ex));
}
} else if (numParams == argsLength + 1) {
// Additional argument is needed, which is the outputcolumn.
Object[] newArgs = null;
try {
String returnTypeName;
if (returnType == null) {
returnTypeName = ((VectorExpression) vclass.newInstance()).getOutputType().toLowerCase();
if (returnTypeName.equals("long")) {
returnTypeName = "bigint";
}
returnType = TypeInfoUtils.getTypeInfoFromTypeString(returnTypeName);
} else {
returnTypeName = returnType.getTypeName();
}
// Special handling for decimal because decimal types need scale and precision parameter.
// This special handling should be avoided by using returnType uniformly for all cases.
int outputCol = ocm.allocateOutputColumn(returnType);
newArgs = Arrays.copyOf(args, numParams);
newArgs[numParams - 1] = outputCol;
ve = (VectorExpression) ctor.newInstance(newArgs);
ve.setOutputType(returnTypeName);
} catch (Exception ex) {
throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + getStackTraceAsSingleLine(ex));
}
}
// Add maxLength parameter to UDFs that have CHAR or VARCHAR output.
if (ve instanceof TruncStringOutput) {
TruncStringOutput truncStringOutput = (TruncStringOutput) ve;
if (returnType instanceof BaseCharTypeInfo) {
BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType;
truncStringOutput.setMaxLength(baseCharTypeInfo.getLength());
}
}
return ve;
}
Aggregations