use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class HiveSchemaConverter method convertType.
Type convertType(TypeInfo typeInfo) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case FLOAT:
return Types.FloatType.get();
case DOUBLE:
return Types.DoubleType.get();
case BOOLEAN:
return Types.BooleanType.get();
case BYTE:
case SHORT:
Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use integer instead", ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
LOG.debug("Using auto conversion from SHORT/BYTE to INTEGER");
return Types.IntegerType.get();
case INT:
return Types.IntegerType.get();
case LONG:
return Types.LongType.get();
case BINARY:
return Types.BinaryType.get();
case CHAR:
case VARCHAR:
Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use string instead", ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
LOG.debug("Using auto conversion from CHAR/VARCHAR to STRING");
return Types.StringType.get();
case STRING:
return Types.StringType.get();
case TIMESTAMP:
return Types.TimestampType.withoutZone();
case DATE:
return Types.DateType.get();
case DECIMAL:
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale());
case INTERVAL_YEAR_MONTH:
case INTERVAL_DAY_TIME:
default:
// special case for Timestamp with Local TZ which is only available in Hive3
if ("TIMESTAMPLOCALTZ".equalsIgnoreCase(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().name())) {
return Types.TimestampType.withZone();
}
throw new IllegalArgumentException("Unsupported Hive type (" + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() + ") for Iceberg tables.");
}
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<Types.NestedField> fields = convertInternal(structTypeInfo.getAllStructFieldNames(), structTypeInfo.getAllStructFieldTypeInfos(), Collections.emptyList());
return Types.StructType.of(fields);
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
Type keyType = convertType(mapTypeInfo.getMapKeyTypeInfo());
Type valueType = convertType(mapTypeInfo.getMapValueTypeInfo());
int keyId = id++;
int valueId = id++;
return Types.MapType.ofOptional(keyId, valueId, keyType, valueType);
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
Type listType = convertType(listTypeInfo.getListElementTypeInfo());
return Types.ListType.ofOptional(id++, listType);
case UNION:
default:
throw new IllegalArgumentException("Unknown type " + typeInfo.getCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class TestSerDe method serialize.
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < fields.size(); i++) {
if (i > 0) {
sb.append(separator);
}
Object column = soi.getStructFieldData(obj, fields.get(i));
if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
// For primitive object, serialize to plain string
sb.append(column == null ? nullString : column.toString());
} else {
// For complex object, serialize to JSON format
sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
}
}
serializeCache.set(sb.toString());
return serializeCache;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class MapJoinBytesTableContainer method getComplexFieldsAsList.
/*
* For primitive types, use LazyBinary's object.
* For complex types, make a standard (Java) object from LazyBinary's object.
*/
public static List<Object> getComplexFieldsAsList(LazyBinaryStruct lazyBinaryStruct, ArrayList<Object> objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField field = fields.get(i);
ObjectInspector objectInspector = field.getFieldObjectInspector();
Category category = objectInspector.getCategory();
Object object = lazyBinaryStruct.getField(i);
if (category == Category.PRIMITIVE) {
objectArrayBuffer.set(i, object);
} else {
objectArrayBuffer.set(i, ObjectInspectorUtils.copyToStandardObject(object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
}
}
return objectArrayBuffer;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorizationContext method castConstantToLong.
private Long castConstantToLong(Object scalar, TypeInfo type, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
if (null == scalar) {
return null;
}
PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory();
switch(primitiveCategory) {
case FLOAT:
case DOUBLE:
case BYTE:
case SHORT:
case INT:
case LONG:
return ((Number) scalar).longValue();
case STRING:
case CHAR:
case VARCHAR:
{
final long longValue;
if (primitiveCategory == PrimitiveCategory.STRING) {
longValue = Long.valueOf((String) scalar);
} else if (primitiveCategory == PrimitiveCategory.CHAR) {
longValue = Long.valueOf(((HiveChar) scalar).getStrippedValue());
} else {
longValue = Long.valueOf(((HiveVarchar) scalar).getValue());
}
switch(integerPrimitiveCategory) {
case BYTE:
if (longValue != ((byte) longValue)) {
// Accurate byte value cannot be obtained.
return null;
}
break;
case SHORT:
if (longValue != ((short) longValue)) {
// Accurate short value cannot be obtained.
return null;
}
break;
case INT:
if (longValue != ((int) longValue)) {
// Accurate int value cannot be obtained.
return null;
}
break;
case LONG:
// No range check needed.
break;
default:
throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
}
return longValue;
}
case DECIMAL:
HiveDecimal decimalVal = (HiveDecimal) scalar;
switch(integerPrimitiveCategory) {
case BYTE:
if (!decimalVal.isByte()) {
// Accurate byte value cannot be obtained.
return null;
}
break;
case SHORT:
if (!decimalVal.isShort()) {
// Accurate short value cannot be obtained.
return null;
}
break;
case INT:
if (!decimalVal.isInt()) {
// Accurate int value cannot be obtained.
return null;
}
break;
case LONG:
if (!decimalVal.isLong()) {
// Accurate long value cannot be obtained.
return null;
}
break;
default:
throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
}
// We only store longs in our LongColumnVector.
return decimalVal.longValue();
default:
throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to LONG");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class VectorizationContext method getStructInExpression.
private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
VectorExpression expr;
StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfos.size();
ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
// Only primitive fields supports for now.
if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
return null;
}
// We are going to serialize using the 4 basic types.
ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
fieldVectorColumnTypes[f] = fieldVectorColumnType;
// We currently evaluate the IN (..) constants in special ways.
PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
fieldInConstantTypes[f] = inConstantType;
}
Output buffer = new Output();
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
final int inChildrenCount = inChildren.size();
byte[][] serializedInChildren = new byte[inChildrenCount][];
try {
for (int i = 0; i < inChildrenCount; i++) {
final ExprNodeDesc node = inChildren.get(i);
final Object[] constants;
if (node instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
ConstantObjectInspector output = constNode.getWritableObjectInspector();
constants = ((List<?>) output.getWritableConstantValue()).toArray();
} else {
ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
constants = (Object[]) evaluator.evaluate(null);
}
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < fieldCount; f++) {
Object constant = constants[f];
if (constant == null) {
binarySortableSerializeWrite.writeNull();
} else {
InConstantType inConstantType = fieldInConstantTypes[f];
switch(inConstantType) {
case STRING_FAMILY:
{
byte[] bytes;
if (constant instanceof Text) {
Text text = (Text) constant;
bytes = text.getBytes();
binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
} else {
throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
}
}
break;
case INT_FAMILY:
{
long value;
if (constant instanceof IntWritable) {
value = ((IntWritable) constant).get();
} else if (constant instanceof LongWritable) {
value = ((LongWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeLong(value);
}
break;
case FLOAT_FAMILY:
{
double value;
if (constant instanceof DoubleWritable) {
value = ((DoubleWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeDouble(value);
}
break;
// UNDONE...
case DATE:
case TIMESTAMP:
case DECIMAL:
default:
throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
}
}
}
serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
}
} catch (Exception e) {
throw new HiveException(e);
}
// Create a single child representing the scratch column where we will
// generate the serialized keys of the batch.
int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((IStringInExpr) expr).setInListValues(serializedInChildren);
((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
return expr;
}
Aggregations