Search in sources :

Example 86 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class GenericUDFConcat method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Loop through all the inputs to determine the appropriate return type/length.
    // Return type:
    // All CHAR inputs: return CHAR
    // All VARCHAR inputs: return VARCHAR
    // All CHAR/VARCHAR inputs: return VARCHAR
    // All BINARY inputs: return BINARY
    // Otherwise return STRING
    argumentOIs = arguments;
    PrimitiveCategory currentCategory;
    PrimitiveObjectInspector poi;
    boolean fixedLengthReturnValue = true;
    // Only for char/varchar return types
    int returnLength = 0;
    for (int idx = 0; idx < arguments.length; ++idx) {
        if (arguments[idx].getCategory() != Category.PRIMITIVE) {
            throw new UDFArgumentException("CONCAT only takes primitive arguments");
        }
        poi = (PrimitiveObjectInspector) arguments[idx];
        currentCategory = poi.getPrimitiveCategory();
        if (idx == 0) {
            returnType = currentCategory;
        }
        switch(currentCategory) {
            case BINARY:
                fixedLengthReturnValue = false;
                if (returnType != currentCategory) {
                    // mix of binary/non-binary args
                    returnType = PrimitiveCategory.STRING;
                }
                break;
            case CHAR:
            case VARCHAR:
                if (!fixedLengthReturnValue) {
                    returnType = PrimitiveCategory.STRING;
                }
                if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
                    returnType = PrimitiveCategory.VARCHAR;
                }
                break;
            default:
                returnType = PrimitiveCategory.STRING;
                fixedLengthReturnValue = false;
                break;
        }
        // max length for the char/varchar, then the return type reverts to string.
        if (fixedLengthReturnValue) {
            returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
            if ((returnType == PrimitiveCategory.VARCHAR && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) || (returnType == PrimitiveCategory.CHAR && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
                returnType = PrimitiveCategory.STRING;
                fixedLengthReturnValue = false;
            }
        }
    }
    if (returnType == PrimitiveCategory.BINARY) {
        bw = new BytesWritable[arguments.length];
        return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
    } else {
        // treat all inputs as string, the return value will be converted to the appropriate type.
        createStringConverters();
        returnHelper = new GenericUDFUtils.StringHelper(returnType);
        BaseCharTypeInfo typeInfo;
        switch(returnType) {
            case STRING:
                return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
            case CHAR:
                typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
                return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            case VARCHAR:
                typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
                return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
            default:
                throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
        }
    }
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 87 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class GenericUDFLikeAny method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length < 2) {
        throw new UDFArgumentLengthException("The like any operator requires at least one pattern for matching, got " + (arguments.length - 1));
    }
    inputTypes = new PrimitiveCategory[arguments.length];
    converters = new Converter[arguments.length];
    /**
     *expects string and null arguments
     */
    for (int idx = 0; idx < arguments.length; idx++) {
        checkArgPrimitive(arguments, idx);
        checkArgGroups(arguments, idx, inputTypes, PrimitiveGrouping.STRING_GROUP, PrimitiveGrouping.VOID_GROUP);
        PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[idx]).getPrimitiveCategory();
        if (arguments[idx] instanceof ConstantObjectInspector && idx != 0) {
            Object constValue = ((ConstantObjectInspector) arguments[idx]).getWritableConstantValue();
            if (!isConstantNullPatternContain && constValue == null) {
                isConstantNullPatternContain = true;
            }
        } else if (idx != 0 && isAllPatternsConstant) {
            isAllPatternsConstant = false;
        }
        converters[idx] = ObjectInspectorConverters.getConverter(arguments[idx], getOutputOI(inputType));
        inputTypes[idx] = inputType;
    }
    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
}
Also used : UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 88 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class FunctionRegistry method getPrimitiveCommonCategory.

public static PrimitiveCategory getPrimitiveCommonCategory(TypeInfo a, TypeInfo b) {
    if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
        return null;
    }
    PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
    PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
    if (pcA == pcB) {
        // Same primitive category
        return pcA;
    }
    if (pcA == PrimitiveCategory.VOID) {
        // Handle NULL, we return the type of pcB
        return pcB;
    }
    if (pcB == PrimitiveCategory.VOID) {
        // Handle NULL, we return the type of pcA
        return pcA;
    }
    PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
    PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
    if (pgA == pgB) {
        // Equal groups, return what we can handle
        switch(pgA) {
            case NUMERIC_GROUP:
                {
                    Integer ai = TypeInfoUtils.numericTypes.get(pcA);
                    Integer bi = TypeInfoUtils.numericTypes.get(pcB);
                    return (ai > bi) ? pcA : pcB;
                }
            case DATE_GROUP:
                {
                    Integer ai = TypeInfoUtils.dateTypes.get(pcA);
                    Integer bi = TypeInfoUtils.dateTypes.get(pcB);
                    return (ai > bi) ? pcA : pcB;
                }
            case STRING_GROUP:
                {
                    // handle string types properly
                    return PrimitiveCategory.STRING;
                }
            default:
                break;
        }
    }
    // Handle date-string common category and numeric-string common category
    if (pgA == PrimitiveGrouping.STRING_GROUP && (pgB == PrimitiveGrouping.DATE_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP)) {
        return pcA;
    }
    if (pgB == PrimitiveGrouping.STRING_GROUP && (pgA == PrimitiveGrouping.DATE_GROUP || pgA == PrimitiveGrouping.NUMERIC_GROUP)) {
        return pcB;
    }
    // We could not find a common category, return null
    return null;
}
Also used : UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFXPathInteger(org.apache.hadoop.hive.ql.udf.xml.UDFXPathInteger) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveGrouping(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping)

Example 89 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class FunctionRegistry method getCommonClassForUnionAll.

/**
 * Find a common type for union-all operator. Only the common types for the same
 * type group will resolve to a common type. No implicit conversion across different
 * type groups will be done.
 */
public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) {
    if (a.equals(b)) {
        return a;
    }
    if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
        return null;
    }
    PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
    PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
    if (pcA == pcB) {
        // Same primitive category but different qualifiers.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
    }
    PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
    PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
    // untyped nulls
    if (pgA == PrimitiveGrouping.VOID_GROUP) {
        return b;
    }
    if (pgB == PrimitiveGrouping.VOID_GROUP) {
        return a;
    }
    if (pgA != pgB) {
        return null;
    }
    switch(pgA) {
        case STRING_GROUP:
            return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
        case NUMERIC_GROUP:
            return TypeInfoUtils.implicitConvertible(a, b) ? b : a;
        case DATE_GROUP:
            return TypeInfoFactory.timestampTypeInfo;
        default:
            return null;
    }
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveGrouping(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping)

Example 90 with PrimitiveCategory

use of org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory in project hive by apache.

the class Vectorizer method specializeReduceSinkOperator.

private Operator<? extends OperatorDesc> specializeReduceSinkOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException {
    VectorReduceSinkInfo vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo();
    Type[] reduceSinkKeyColumnVectorTypes = vectorReduceSinkInfo.getReduceSinkKeyColumnVectorTypes();
    // By default, we can always use the multi-key class.
    VectorReduceSinkDesc.ReduceSinkKeyType reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.MULTI_KEY;
    // Look for single column optimization.
    if (reduceSinkKeyColumnVectorTypes != null && reduceSinkKeyColumnVectorTypes.length == 1) {
        LOG.info("Vectorizer vectorizeOperator groupby typeName " + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()[0]);
        Type columnVectorType = reduceSinkKeyColumnVectorTypes[0];
        switch(columnVectorType) {
            case LONG:
                {
                    PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()[0]).getPrimitiveCategory();
                    switch(primitiveCategory) {
                        case BOOLEAN:
                        case BYTE:
                        case SHORT:
                        case INT:
                        case LONG:
                            reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.LONG;
                            break;
                        default:
                            // Other integer types not supported yet.
                            break;
                    }
                }
                break;
            case BYTES:
                reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.STRING;
            default:
                // Stay with multi-key.
                break;
        }
    }
    Class<? extends Operator<?>> opClass = null;
    if (vectorReduceSinkInfo.getUseUniformHash()) {
        if (vectorDesc.getIsEmptyKey()) {
            opClass = VectorReduceSinkEmptyKeyOperator.class;
        } else {
            switch(reduceSinkKeyType) {
                case LONG:
                    opClass = VectorReduceSinkLongOperator.class;
                    break;
                case STRING:
                    opClass = VectorReduceSinkStringOperator.class;
                    break;
                case MULTI_KEY:
                    opClass = VectorReduceSinkMultiKeyOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType);
            }
        }
    } else {
        if (vectorDesc.getIsEmptyKey() && vectorDesc.getIsEmptyBuckets() && vectorDesc.getIsEmptyPartitions()) {
            opClass = VectorReduceSinkEmptyKeyOperator.class;
        } else {
            opClass = VectorReduceSinkObjectHashOperator.class;
        }
    }
    vectorDesc.setReduceSinkKeyType(reduceSinkKeyType);
    vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo);
    LOG.info("Vectorizer vectorizeOperator reduce sink class " + opClass.getSimpleName());
    Operator<? extends OperatorDesc> vectorOp = null;
    try {
        vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
    } catch (Exception e) {
        LOG.info("Vectorizer vectorizeOperator reduce sink class exception " + opClass.getSimpleName() + " exception " + e);
        throw new HiveException(e);
    }
    return vectorOp;
}
Also used : InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)84 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)45 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)26 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 ArrayList (java.util.ArrayList)15 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)15 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)14 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)12 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)12 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)11 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 Test (org.junit.Test)11 UDFArgumentTypeException (org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException)10 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)10 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)9 ConstantObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)9 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)8 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)8 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)8