Search in sources :

Example 26 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class HiveSchemaConverter method convertType.

Type convertType(TypeInfo typeInfo) {
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
                case FLOAT:
                    return Types.FloatType.get();
                case DOUBLE:
                    return Types.DoubleType.get();
                case BOOLEAN:
                    return Types.BooleanType.get();
                case BYTE:
                case SHORT:
                    Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use integer instead", ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
                    LOG.debug("Using auto conversion from SHORT/BYTE to INTEGER");
                    return Types.IntegerType.get();
                case INT:
                    return Types.IntegerType.get();
                case LONG:
                    return Types.LongType.get();
                case BINARY:
                    return Types.BinaryType.get();
                case CHAR:
                case VARCHAR:
                    Preconditions.checkArgument(autoConvert, "Unsupported Hive type: %s, use string instead", ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
                    LOG.debug("Using auto conversion from CHAR/VARCHAR to STRING");
                    return Types.StringType.get();
                case STRING:
                    return Types.StringType.get();
                case TIMESTAMP:
                    return Types.TimestampType.withoutZone();
                case DATE:
                    return Types.DateType.get();
                case DECIMAL:
                    DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
                    return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale());
                case INTERVAL_YEAR_MONTH:
                case INTERVAL_DAY_TIME:
                default:
                    // special case for Timestamp with Local TZ which is only available in Hive3
                    if ("TIMESTAMPLOCALTZ".equalsIgnoreCase(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().name())) {
                        return Types.TimestampType.withZone();
                    }
                    throw new IllegalArgumentException("Unsupported Hive type (" + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() + ") for Iceberg tables.");
            }
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<Types.NestedField> fields = convertInternal(structTypeInfo.getAllStructFieldNames(), structTypeInfo.getAllStructFieldTypeInfos(), Collections.emptyList());
            return Types.StructType.of(fields);
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            Type keyType = convertType(mapTypeInfo.getMapKeyTypeInfo());
            Type valueType = convertType(mapTypeInfo.getMapValueTypeInfo());
            int keyId = id++;
            int valueId = id++;
            return Types.MapType.ofOptional(keyId, valueId, keyType, valueType);
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            Type listType = convertType(listTypeInfo.getListElementTypeInfo());
            return Types.ListType.ofOptional(id++, listType);
        case UNION:
        default:
            throw new IllegalArgumentException("Unknown type " + typeInfo.getCategory());
    }
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) Type(org.apache.iceberg.types.Type) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 27 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class TestSerDe method serialize.

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    if (objInspector.getCategory() != Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < fields.size(); i++) {
        if (i > 0) {
            sb.append(separator);
        }
        Object column = soi.getStructFieldData(obj, fields.get(i));
        if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
            // For primitive object, serialize to plain string
            sb.append(column == null ? nullString : column.toString());
        } else {
            // For complex object, serialize to JSON format
            sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
        }
    }
    serializeCache.set(sb.toString());
    return serializeCache;
}
Also used : MetadataListStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 28 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class MapJoinBytesTableContainer method getComplexFieldsAsList.

/*
   * For primitive types, use LazyBinary's object.
   * For complex types, make a standard (Java) object from LazyBinary's object.
   */
public static List<Object> getComplexFieldsAsList(LazyBinaryStruct lazyBinaryStruct, ArrayList<Object> objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
    List<? extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
    for (int i = 0; i < fields.size(); i++) {
        StructField field = fields.get(i);
        ObjectInspector objectInspector = field.getFieldObjectInspector();
        Category category = objectInspector.getCategory();
        Object object = lazyBinaryStruct.getField(i);
        if (category == Category.PRIMITIVE) {
            objectArrayBuffer.set(i, object);
        } else {
            objectArrayBuffer.set(i, ObjectInspectorUtils.copyToStandardObject(object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
        }
    }
    return objectArrayBuffer;
}
Also used : ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) LazyBinaryStructObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)

Example 29 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class VectorizationContext method castConstantToLong.

private Long castConstantToLong(Object scalar, TypeInfo type, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
    if (null == scalar) {
        return null;
    }
    PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
    PrimitiveCategory primitiveCategory = ptinfo.getPrimitiveCategory();
    switch(primitiveCategory) {
        case FLOAT:
        case DOUBLE:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            return ((Number) scalar).longValue();
        case STRING:
        case CHAR:
        case VARCHAR:
            {
                final long longValue;
                if (primitiveCategory == PrimitiveCategory.STRING) {
                    longValue = Long.valueOf((String) scalar);
                } else if (primitiveCategory == PrimitiveCategory.CHAR) {
                    longValue = Long.valueOf(((HiveChar) scalar).getStrippedValue());
                } else {
                    longValue = Long.valueOf(((HiveVarchar) scalar).getValue());
                }
                switch(integerPrimitiveCategory) {
                    case BYTE:
                        if (longValue != ((byte) longValue)) {
                            // Accurate byte value cannot be obtained.
                            return null;
                        }
                        break;
                    case SHORT:
                        if (longValue != ((short) longValue)) {
                            // Accurate short value cannot be obtained.
                            return null;
                        }
                        break;
                    case INT:
                        if (longValue != ((int) longValue)) {
                            // Accurate int value cannot be obtained.
                            return null;
                        }
                        break;
                    case LONG:
                        // No range check needed.
                        break;
                    default:
                        throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
                }
                return longValue;
            }
        case DECIMAL:
            HiveDecimal decimalVal = (HiveDecimal) scalar;
            switch(integerPrimitiveCategory) {
                case BYTE:
                    if (!decimalVal.isByte()) {
                        // Accurate byte value cannot be obtained.
                        return null;
                    }
                    break;
                case SHORT:
                    if (!decimalVal.isShort()) {
                        // Accurate short value cannot be obtained.
                        return null;
                    }
                    break;
                case INT:
                    if (!decimalVal.isInt()) {
                        // Accurate int value cannot be obtained.
                        return null;
                    }
                    break;
                case LONG:
                    if (!decimalVal.isLong()) {
                        // Accurate long value cannot be obtained.
                        return null;
                    }
                    break;
                default:
                    throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
            }
            // We only store longs in our LongColumnVector.
            return decimalVal.longValue();
        default:
            throw new HiveException("Unsupported primitive category " + primitiveCategory + " for cast to LONG");
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 30 with PRIMITIVE

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.

the class VectorizationContext method getStructInExpression.

private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    VectorExpression expr;
    StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
    List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfos.size();
    ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
    InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
    for (int f = 0; f < fieldCount; f++) {
        TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
        // Only primitive fields supports for now.
        if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            return null;
        }
        // We are going to serialize using the 4 basic types.
        ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
        fieldVectorColumnTypes[f] = fieldVectorColumnType;
        // We currently evaluate the IN (..) constants in special ways.
        PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
        InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
        fieldInConstantTypes[f] = inConstantType;
    }
    Output buffer = new Output();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
    final int inChildrenCount = inChildren.size();
    byte[][] serializedInChildren = new byte[inChildrenCount][];
    try {
        for (int i = 0; i < inChildrenCount; i++) {
            final ExprNodeDesc node = inChildren.get(i);
            final Object[] constants;
            if (node instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
                ConstantObjectInspector output = constNode.getWritableObjectInspector();
                constants = ((List<?>) output.getWritableConstantValue()).toArray();
            } else {
                ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
                ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
                ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
                constants = (Object[]) evaluator.evaluate(null);
            }
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < fieldCount; f++) {
                Object constant = constants[f];
                if (constant == null) {
                    binarySortableSerializeWrite.writeNull();
                } else {
                    InConstantType inConstantType = fieldInConstantTypes[f];
                    switch(inConstantType) {
                        case STRING_FAMILY:
                            {
                                byte[] bytes;
                                if (constant instanceof Text) {
                                    Text text = (Text) constant;
                                    bytes = text.getBytes();
                                    binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
                                } else {
                                    throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
                                }
                            }
                            break;
                        case INT_FAMILY:
                            {
                                long value;
                                if (constant instanceof IntWritable) {
                                    value = ((IntWritable) constant).get();
                                } else if (constant instanceof LongWritable) {
                                    value = ((LongWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeLong(value);
                            }
                            break;
                        case FLOAT_FAMILY:
                            {
                                double value;
                                if (constant instanceof DoubleWritable) {
                                    value = ((DoubleWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeDouble(value);
                            }
                            break;
                        // UNDONE...
                        case DATE:
                        case TIMESTAMP:
                        case DECIMAL:
                        default:
                            throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
                    }
                }
            }
            serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // Create a single child representing the scratch column where we will
    // generate the serialized keys of the batch.
    int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
    Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
    expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
    ((IStringInExpr) expr).setInListValues(serializedInChildren);
    ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
    ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.StructColumnInList) FilterStructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList) IStringInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) TruncStringOutput(org.apache.hadoop.hive.ql.exec.vector.expressions.TruncStringOutput) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) IntWritable(org.apache.hadoop.io.IntWritable) FilterStructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList) IStructInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStructInExpr) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)83 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)75 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)74 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)64 ArrayList (java.util.ArrayList)54 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)52 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)47 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)46 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)45 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)44 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)43 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)38 BytesWritable (org.apache.hadoop.io.BytesWritable)36 Text (org.apache.hadoop.io.Text)35 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)34 List (java.util.List)30 Map (java.util.Map)30 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)30 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)30 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)27