Search in sources :

Example 26 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class FunctionRegistry method getCommonClassForComparison.

/**
   * Find a common class that objects of both TypeInfo a and TypeInfo b can
   * convert to. This is used for comparing objects of type a and type b.
   *
   * When we are comparing string and double, we will always convert both of
   * them to double and then compare.
   *
   * @return null if no common class could be found.
   */
public static synchronized TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) {
    // If same return one of them
    if (a.equals(b)) {
        return a;
    }
    if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
        return null;
    }
    PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
    PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
    if (pcA == pcB) {
        // Rely on getTypeInfoForPrimitiveCategory() to sort out the type params.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
    }
    PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
    PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
    if (pgA == pgB) {
        // grouping is same, but category is not.
        if (pgA == PrimitiveGrouping.DATE_GROUP) {
            // we got timestamp & date and timestamp has higher precedence than date
            return TypeInfoFactory.timestampTypeInfo;
        }
    }
    // handle string types properly
    if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) {
        // Compare as strings. Char comparison semantics may be different if/when implemented.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
    }
    // timestamp/date is higher precedence than String_GROUP
    if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) {
        return b;
    }
    // date/timestamp is higher precedence than String_GROUP
    if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) {
        return a;
    }
    // Another special case, because timestamp is not implicitly convertible to numeric types.
    if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP) && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) {
        return TypeInfoFactory.doubleTypeInfo;
    }
    for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) {
        if (TypeInfoUtils.implicitConvertible(pcA, t) && TypeInfoUtils.implicitConvertible(pcB, t)) {
            return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, t);
        }
    }
    return null;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveGrouping(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping)

Example 27 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class FunctionRegistry method getMethodInternal.

/**
   * Gets the closest matching method corresponding to the argument list from a
   * list of methods.
   *
   * @param mlist
   *          The list of methods to inspect.
   * @param exact
   *          Boolean to indicate whether this is an exact match or not.
   * @param argumentsPassed
   *          The classes for the argument.
   * @return The matching method.
   */
public static Method getMethodInternal(Class<?> udfClass, List<Method> mlist, boolean exact, List<TypeInfo> argumentsPassed) throws UDFArgumentException {
    // result
    List<Method> udfMethods = new ArrayList<Method>();
    // The cost of the result
    int leastConversionCost = Integer.MAX_VALUE;
    for (Method m : mlist) {
        List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
        if (argumentsAccepted == null) {
            // null means the method does not accept number of arguments passed.
            continue;
        }
        boolean match = (argumentsAccepted.size() == argumentsPassed.size());
        int conversionCost = 0;
        for (int i = 0; i < argumentsPassed.size() && match; i++) {
            int cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), exact);
            if (cost == -1) {
                match = false;
            } else {
                conversionCost += cost;
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " + argumentsPassed + " accepted = " + argumentsAccepted + " method = " + m);
        }
        if (match) {
            // Always choose the function with least implicit conversions.
            if (conversionCost < leastConversionCost) {
                udfMethods.clear();
                udfMethods.add(m);
                leastConversionCost = conversionCost;
                // Found an exact match
                if (leastConversionCost == 0) {
                    break;
                }
            } else if (conversionCost == leastConversionCost) {
                // Ambiguous call: two methods with the same number of implicit
                // conversions
                udfMethods.add(m);
            // Don't break! We might find a better match later.
            } else {
            // do nothing if implicitConversions > leastImplicitConversions
            }
        }
    }
    if (udfMethods.size() == 0) {
        // No matching methods found
        throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist);
    }
    if (udfMethods.size() > 1) {
        // First try selecting methods based on the type affinity of the arguments passed
        // to the candidate method arguments.
        filterMethodsByTypeAffinity(udfMethods, argumentsPassed);
    }
    if (udfMethods.size() > 1) {
        // if the only difference is numeric types, pick the method
        // with the smallest overall numeric type.
        int lowestNumericType = Integer.MAX_VALUE;
        boolean multiple = true;
        Method candidate = null;
        List<TypeInfo> referenceArguments = null;
        for (Method m : udfMethods) {
            int maxNumericType = 0;
            List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
            if (referenceArguments == null) {
                // keep the arguments for reference - we want all the non-numeric
                // arguments to be the same
                referenceArguments = argumentsAccepted;
            }
            Iterator<TypeInfo> referenceIterator = referenceArguments.iterator();
            for (TypeInfo accepted : argumentsAccepted) {
                TypeInfo reference = referenceIterator.next();
                boolean acceptedIsPrimitive = false;
                PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN;
                if (accepted.getCategory() == Category.PRIMITIVE) {
                    acceptedIsPrimitive = true;
                    acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory();
                }
                if (acceptedIsPrimitive && TypeInfoUtils.numericTypes.containsKey(acceptedPrimCat)) {
                    // We're looking for the udf with the smallest maximum numeric type.
                    int typeValue = TypeInfoUtils.numericTypes.get(acceptedPrimCat);
                    maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType;
                } else if (!accepted.equals(reference)) {
                    // another. We give up at this point.
                    throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
                }
            }
            if (lowestNumericType > maxNumericType) {
                multiple = false;
                lowestNumericType = maxNumericType;
                candidate = m;
            } else if (maxNumericType == lowestNumericType) {
                // multiple udfs with the same max type. Unless we find a lower one
                // we'll give up.
                multiple = true;
            }
        }
        if (!multiple) {
            return candidate;
        } else {
            throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
        }
    }
    return udfMethods.get(0);
}
Also used : ArrayList(java.util.ArrayList) Method(java.lang.reflect.Method) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 28 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class MapJoinKey method isSupportedField.

public static boolean isSupportedField(TypeInfo typeInfo) {
    // not supported
    if (typeInfo.getCategory() != Category.PRIMITIVE)
        return false;
    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
    PrimitiveCategory pc = primitiveTypeInfo.getPrimitiveCategory();
    // not supported
    if (!SUPPORTED_PRIMITIVES.contains(pc))
        return false;
    return true;
}
Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 29 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorizationContext method castConstantToLong.

private Long castConstantToLong(Object scalar, TypeInfo type, PrimitiveCategory integerPrimitiveCategory) throws HiveException {
    if (null == scalar) {
        return null;
    }
    PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
    String typename = type.getTypeName();
    switch(ptinfo.getPrimitiveCategory()) {
        case FLOAT:
        case DOUBLE:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            return ((Number) scalar).longValue();
        case DECIMAL:
            HiveDecimal decimalVal = (HiveDecimal) scalar;
            switch(integerPrimitiveCategory) {
                case BYTE:
                    if (!decimalVal.isByte()) {
                        // Accurate byte value cannot be obtained.
                        return null;
                    }
                    break;
                case SHORT:
                    if (!decimalVal.isShort()) {
                        // Accurate short value cannot be obtained.
                        return null;
                    }
                    break;
                case INT:
                    if (!decimalVal.isInt()) {
                        // Accurate int value cannot be obtained.
                        return null;
                    }
                    break;
                case LONG:
                    if (!decimalVal.isLong()) {
                        // Accurate long value cannot be obtained.
                        return null;
                    }
                    break;
                default:
                    throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory);
            }
            // We only store longs in our LongColumnVector.
            return decimalVal.longValue();
        default:
            throw new HiveException("Unsupported type " + typename + " for cast to Long");
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 30 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorSerializeRow method init.

public void init(List<String> typeNames, int[] columnMap) throws HiveException {
    final int size = typeNames.size();
    categories = new Category[size];
    primitiveCategories = new PrimitiveCategory[size];
    outputColumnNums = Arrays.copyOf(columnMap, size);
    TypeInfo typeInfo;
    for (int i = 0; i < size; i++) {
        typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
        categories[i] = typeInfo.getCategory();
        if (categories[i] == Category.PRIMITIVE) {
            primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
        }
    }
}
Also used : TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)110 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)37 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)33 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)20 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)19 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)15 BytesWritable (org.apache.hadoop.io.BytesWritable)15 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)14 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 Text (org.apache.hadoop.io.Text)13 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)11 BooleanWritable (org.apache.hadoop.io.BooleanWritable)11