use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class TypeInfoUtils method getExtendedTypeInfoFromJavaType.
/**
* Return the extended TypeInfo from a Java type. By extended TypeInfo, we
* allow unknownType for java.lang.Object.
*
* @param t
* The Java type.
* @param m
* The method, only used for generating error messages.
*/
private static TypeInfo getExtendedTypeInfoFromJavaType(Type t, Method m) {
if (t == Object.class) {
return TypeInfoFactory.unknownTypeInfo;
}
if (t instanceof ParameterizedType) {
ParameterizedType pt = (ParameterizedType) t;
// List?
if (List.class == (Class<?>) pt.getRawType() || ArrayList.class == (Class<?>) pt.getRawType()) {
return TypeInfoFactory.getListTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m));
}
// Map?
if (Map.class == (Class<?>) pt.getRawType() || HashMap.class == (Class<?>) pt.getRawType()) {
return TypeInfoFactory.getMapTypeInfo(getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[0], m), getExtendedTypeInfoFromJavaType(pt.getActualTypeArguments()[1], m));
}
// Otherwise convert t to RawType so we will fall into the following if
// block.
t = pt.getRawType();
}
// Must be a class.
if (!(t instanceof Class)) {
throw new RuntimeException("Hive does not understand type " + t + " from " + m);
}
Class<?> c = (Class<?>) t;
// Java Primitive Type?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaType(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaType(c).primitiveCategory));
}
// Java Primitive Class?
if (PrimitiveObjectInspectorUtils.isPrimitiveJavaClass(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(c).primitiveCategory));
}
// Primitive Writable class?
if (PrimitiveObjectInspectorUtils.isPrimitiveWritableClass(c)) {
return TypeInfoUtils.getTypeInfoFromObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory));
}
// Must be a struct
Field[] fields = ObjectInspectorUtils.getDeclaredNonStaticFields(c);
ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
ArrayList<TypeInfo> fieldTypeInfos = new ArrayList<TypeInfo>(fields.length);
for (Field field : fields) {
fieldNames.add(field.getName());
fieldTypeInfos.add(getExtendedTypeInfoFromJavaType(field.getGenericType(), m));
}
return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class SerdeRandomRowSource method chooseSchema.
private void chooseSchema(SupportedTypes supportedTypes, int maxComplexDepth) {
HashSet hashSet = null;
final boolean allTypes;
final boolean onlyOne = (r.nextInt(100) == 7);
if (onlyOne) {
columnCount = 1;
allTypes = false;
} else {
allTypes = r.nextBoolean();
if (allTypes) {
switch(supportedTypes) {
case ALL:
columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
break;
case ALL_EXCEPT_MAP:
columnCount = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
break;
case PRIMITIVE:
columnCount = possibleHivePrimitiveTypeNames.length;
break;
}
hashSet = new HashSet<Integer>();
} else {
columnCount = 1 + r.nextInt(20);
}
}
typeNames = new ArrayList<String>(columnCount);
categories = new Category[columnCount];
typeInfos = new TypeInfo[columnCount];
objectInspectorList = new ArrayList<ObjectInspector>(columnCount);
primitiveCategories = new PrimitiveCategory[columnCount];
primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
final List<String> columnNames = new ArrayList<String>(columnCount);
for (int c = 0; c < columnCount; c++) {
columnNames.add(String.format("col%d", c));
String typeName;
if (onlyOne) {
typeName = getRandomTypeName(supportedTypes);
} else {
int typeNum;
if (allTypes) {
int maxTypeNum = 0;
switch(supportedTypes) {
case PRIMITIVE:
maxTypeNum = possibleHivePrimitiveTypeNames.length;
break;
case ALL_EXCEPT_MAP:
maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length - 1;
break;
case ALL:
maxTypeNum = possibleHivePrimitiveTypeNames.length + possibleHiveComplexTypeNames.length;
break;
}
while (true) {
typeNum = r.nextInt(maxTypeNum);
final Integer typeNumInteger = new Integer(typeNum);
if (!hashSet.contains(typeNumInteger)) {
hashSet.add(typeNumInteger);
break;
}
}
} else {
if (supportedTypes == SupportedTypes.PRIMITIVE || r.nextInt(10) != 0) {
typeNum = r.nextInt(possibleHivePrimitiveTypeNames.length);
} else {
typeNum = possibleHivePrimitiveTypeNames.length + r.nextInt(possibleHiveComplexTypeNames.length);
if (supportedTypes == SupportedTypes.ALL_EXCEPT_MAP) {
typeNum--;
}
}
}
if (typeNum < possibleHivePrimitiveTypeNames.length) {
typeName = possibleHivePrimitiveTypeNames[typeNum];
} else {
typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length];
}
}
final String decoratedTypeName = getDecoratedTypeName(typeName, supportedTypes, 0, maxComplexDepth);
final TypeInfo typeInfo;
try {
typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(decoratedTypeName);
} catch (Exception e) {
throw new RuntimeException("Cannot convert type name " + decoratedTypeName + " to a type " + e);
}
typeInfos[c] = typeInfo;
final Category category = typeInfo.getCategory();
categories[c] = category;
ObjectInspector objectInspector = getObjectInspector(typeInfo);
switch(category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
objectInspector = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo);
primitiveTypeInfos[c] = primitiveTypeInfo;
primitiveCategories[c] = primitiveCategory;
primitiveObjectInspectorList.add(objectInspector);
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
primitiveObjectInspectorList.add(null);
break;
default:
throw new RuntimeException("Unexpected catagory " + category);
}
objectInspectorList.add(objectInspector);
if (category == Category.PRIMITIVE) {
}
typeNames.add(decoratedTypeName);
}
rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, objectInspectorList);
alphabets = new String[columnCount];
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class LazyBinaryUtils method checkObjectByteInfo.
/**
* Check a particular field and set its size and offset in bytes based on the
* field type and the bytes arrays.
*
* For void, boolean, byte, short, int, long, float and double, there is no
* offset and the size is fixed. For string, map, list, struct, the first four
* bytes are used to store the size. So the offset is 4 and the size is
* computed by concating the first four bytes together. The first four bytes
* are defined with respect to the offset in the bytes arrays.
* For timestamp, if the first bit is 0, the record length is 4, otherwise
* a VInt begins at the 5th byte and its length is added to 4.
*
* @param objectInspector
* object inspector of the field
* @param bytes
* bytes arrays store the table row
* @param offset
* offset of this field
* @param recordInfo
* modify this byteinfo object and return it
*/
public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo, VInt vInt) {
Category category = objectInspector.getCategory();
switch(category) {
case PRIMITIVE:
PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector).getPrimitiveCategory();
switch(primitiveCategory) {
case VOID:
recordInfo.elementOffset = 0;
recordInfo.elementSize = 0;
break;
case BOOLEAN:
case BYTE:
recordInfo.elementOffset = 0;
recordInfo.elementSize = 1;
break;
case SHORT:
recordInfo.elementOffset = 0;
recordInfo.elementSize = 2;
break;
case FLOAT:
recordInfo.elementOffset = 0;
recordInfo.elementSize = 4;
break;
case DOUBLE:
recordInfo.elementOffset = 0;
recordInfo.elementSize = 8;
break;
case INT:
recordInfo.elementOffset = 0;
recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
break;
case LONG:
recordInfo.elementOffset = 0;
recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
break;
case STRING:
// using vint instead of 4 bytes
LazyBinaryUtils.readVInt(bytes, offset, vInt);
recordInfo.elementOffset = vInt.length;
recordInfo.elementSize = vInt.value;
break;
case CHAR:
case VARCHAR:
LazyBinaryUtils.readVInt(bytes, offset, vInt);
recordInfo.elementOffset = vInt.length;
recordInfo.elementSize = vInt.value;
break;
case BINARY:
// using vint instead of 4 bytes
LazyBinaryUtils.readVInt(bytes, offset, vInt);
recordInfo.elementOffset = vInt.length;
recordInfo.elementSize = vInt.value;
break;
case DATE:
recordInfo.elementOffset = 0;
recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
break;
case TIMESTAMP:
recordInfo.elementOffset = 0;
recordInfo.elementSize = TimestampWritable.getTotalLength(bytes, offset);
break;
case TIMESTAMPLOCALTZ:
recordInfo.elementOffset = 0;
recordInfo.elementSize = TimestampLocalTZWritable.getTotalLength(bytes, offset);
break;
case INTERVAL_YEAR_MONTH:
recordInfo.elementOffset = 0;
recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
break;
case INTERVAL_DAY_TIME:
recordInfo.elementOffset = 0;
int secondsSize = WritableUtils.decodeVIntSize(bytes[offset]);
int nanosSize = WritableUtils.decodeVIntSize(bytes[offset + secondsSize]);
recordInfo.elementSize = secondsSize + nanosSize;
break;
case DECIMAL:
// using vint instead of 4 bytes
LazyBinaryUtils.readVInt(bytes, offset, vInt);
recordInfo.elementOffset = 0;
recordInfo.elementSize = vInt.length;
LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt);
recordInfo.elementSize += vInt.length + vInt.value;
break;
default:
{
throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
}
}
break;
case LIST:
case MAP:
case STRUCT:
case UNION:
recordInfo.elementOffset = 4;
recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
break;
default:
{
throw new RuntimeException("Unrecognized non-primitive type: " + category);
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.
the class LazyBinaryDeserializeRead method readPrimitive.
private boolean readPrimitive(Field field) throws IOException {
final PrimitiveCategory primitiveCategory = field.primitiveCategory;
final TypeInfo typeInfo = field.typeInfo;
switch(primitiveCategory) {
case BOOLEAN:
// No check needed for single byte read.
currentBoolean = (bytes[offset++] != 0);
break;
case BYTE:
// No check needed for single byte read.
currentByte = bytes[offset++];
break;
case SHORT:
// Last item -- ok to be at end.
if (offset + 2 > end) {
throw new EOFException();
}
currentShort = LazyBinaryUtils.byteArrayToShort(bytes, offset);
offset += 2;
break;
case INT:
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
currentInt = tempVInt.value;
break;
case LONG:
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
currentLong = tempVLong.value;
break;
case FLOAT:
// Last item -- ok to be at end.
if (offset + 4 > end) {
throw new EOFException();
}
currentFloat = Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, offset));
offset += 4;
break;
case DOUBLE:
// Last item -- ok to be at end.
if (offset + 8 > end) {
throw new EOFException();
}
currentDouble = Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, offset));
offset += 8;
break;
case BINARY:
case STRING:
case CHAR:
case VARCHAR:
{
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
int saveStart = offset;
int length = tempVInt.value;
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
throw new EOFException();
}
currentBytes = bytes;
currentBytesStart = saveStart;
currentBytesLength = length;
}
break;
case DATE:
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
currentDateWritable.set(tempVInt.value);
break;
case TIMESTAMP:
{
int length = TimestampWritable.getTotalLength(bytes, offset);
int saveStart = offset;
offset += length;
// Last item -- ok to be at end.
if (offset > end) {
throw new EOFException();
}
currentTimestampWritable.set(bytes, saveStart);
}
break;
case INTERVAL_YEAR_MONTH:
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
currentHiveIntervalYearMonthWritable.set(tempVInt.value);
break;
case INTERVAL_DAY_TIME:
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
throw new EOFException();
}
LazyBinaryUtils.readVLong(bytes, offset, tempVLong);
offset += tempVLong.length;
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
currentHiveIntervalDayTimeWritable.set(tempVLong.value, tempVInt.value);
break;
case DECIMAL:
{
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) >= end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
int readScale = tempVInt.value;
// Parse the first byte of a vint/vlong to determine the number of bytes.
if (offset + WritableUtils.decodeVIntSize(bytes[offset]) > end) {
throw new EOFException();
}
LazyBinaryUtils.readVInt(bytes, offset, tempVInt);
offset += tempVInt.length;
int saveStart = offset;
offset += tempVInt.value;
// Last item -- ok to be at end.
if (offset > end) {
throw new EOFException();
}
int length = offset - saveStart;
// scale = 2, length = 6, value = -6065716379.11
// \002\006\255\114\197\131\083\105
// \255\114\197\131\083\105
currentHiveDecimalWritable.setFromBigIntegerBytesAndScale(bytes, saveStart, length, readScale);
boolean decimalIsNull = !currentHiveDecimalWritable.isSet();
if (!decimalIsNull) {
final DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
final int precision = decimalTypeInfo.getPrecision();
final int scale = decimalTypeInfo.getScale();
decimalIsNull = !currentHiveDecimalWritable.mutateEnforcePrecisionScale(precision, scale);
}
if (decimalIsNull) {
return false;
}
}
break;
default:
throw new Error("Unexpected primitive category " + primitiveCategory.name());
}
return true;
}
Aggregations