Examples with PrimitiveTypeInfo - org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo

Example 71 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorAssignRow method assignRowColumn.

private void assignRowColumn(ColumnVector columnVector, int batchIndex, TypeInfo targetTypeInfo, Object object) {
    if (object == null) {
        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
        return;
    }
    switch(targetTypeInfo.getCategory()) {
        case PRIMITIVE:
            {
                final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory();
                switch(targetPrimitiveCategory) {
                    case VOID:
                        VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex);
                        return;
                    case BOOLEAN:
                        if (object instanceof Boolean) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((Boolean) object) ? 1 : 0);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = (((BooleanWritable) object).get() ? 1 : 0);
                        }
                        break;
                    case BYTE:
                        if (object instanceof Byte) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Byte) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ByteWritable) object).get();
                        }
                        break;
                    case SHORT:
                        if (object instanceof Short) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Short) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((ShortWritable) object).get();
                        }
                        break;
                    case INT:
                        if (object instanceof Integer) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Integer) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((IntWritable) object).get();
                        }
                        break;
                    case LONG:
                        if (object instanceof Long) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((Long) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((LongWritable) object).get();
                        }
                        break;
                    case TIMESTAMP:
                        if (object instanceof Timestamp) {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((Timestamp) object));
                        } else {
                            ((TimestampColumnVector) columnVector).set(batchIndex, ((TimestampWritable) object).getTimestamp());
                        }
                        break;
                    case DATE:
                        if (object instanceof Date) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = DateWritable.dateToDays((Date) object);
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((DateWritable) object).getDays();
                        }
                        break;
                    case FLOAT:
                        if (object instanceof Float) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Float) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((FloatWritable) object).get();
                        }
                        break;
                    case DOUBLE:
                        if (object instanceof Double) {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((Double) object);
                        } else {
                            ((DoubleColumnVector) columnVector).vector[batchIndex] = ((DoubleWritable) object).get();
                        }
                        break;
                    case BINARY:
                        {
                            if (object instanceof byte[]) {
                                byte[] bytes = (byte[]) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                BytesWritable bw = (BytesWritable) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bw.getBytes(), 0, bw.getLength());
                            }
                        }
                        break;
                    case STRING:
                        {
                            if (object instanceof String) {
                                String string = (String) object;
                                byte[] bytes = string.getBytes();
                                ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                            } else {
                                Text tw = (Text) object;
                                ((BytesColumnVector) columnVector).setVal(batchIndex, tw.getBytes(), 0, tw.getLength());
                            }
                        }
                        break;
                    case VARCHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store VARCHAR type stripped of pads.
                            HiveVarchar hiveVarchar;
                            if (object instanceof HiveVarchar) {
                                hiveVarchar = (HiveVarchar) object;
                            } else {
                                hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            byte[] bytes = hiveVarchar.getValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case CHAR:
                        {
                            // UNDONE: Performance problem with conversion to String, then bytes...
                            // We store CHAR type stripped of pads.
                            HiveChar hiveChar;
                            if (object instanceof HiveChar) {
                                hiveChar = (HiveChar) object;
                            } else {
                                hiveChar = ((HiveCharWritable) object).getHiveChar();
                            }
                            // TODO: HIVE-13624 Do we need maxLength checking?
                            // We store CHAR in vector row batch with padding stripped.
                            byte[] bytes = hiveChar.getStrippedValue().getBytes();
                            ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length);
                        }
                        break;
                    case DECIMAL:
                        if (object instanceof HiveDecimal) {
                            ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimal) object);
                        } else {
                            ((DecimalColumnVector) columnVector).set(batchIndex, (HiveDecimalWritable) object);
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        if (object instanceof HiveIntervalYearMonth) {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonth) object).getTotalMonths();
                        } else {
                            ((LongColumnVector) columnVector).vector[batchIndex] = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths();
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        if (object instanceof HiveIntervalDayTime) {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, (HiveIntervalDayTime) object);
                        } else {
                            ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime());
                        }
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported");
                }
            }
            break;
        case LIST:
            {
                final ListColumnVector listColumnVector = (ListColumnVector) columnVector;
                final ListTypeInfo listTypeInfo = (ListTypeInfo) targetTypeInfo;
                final TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
                final List list = (List) object;
                final int size = list.size();
                final int childCount = listColumnVector.childCount;
                listColumnVector.offsets[batchIndex] = childCount;
                listColumnVector.lengths[batchIndex] = size;
                listColumnVector.childCount = childCount + size;
                listColumnVector.child.ensureSize(childCount + size, true);
                for (int i = 0; i < size; i++) {
                    assignRowColumn(listColumnVector.child, childCount + i, elementTypeInfo, list.get(i));
                }
            }
            break;
        case MAP:
            {
                final MapColumnVector mapColumnVector = (MapColumnVector) columnVector;
                final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo;
                final Map<Object, Object> map = (Map<Object, Object>) object;
                final int size = map.size();
                int childCount = mapColumnVector.childCount;
                mapColumnVector.offsets[batchIndex] = childCount;
                mapColumnVector.lengths[batchIndex] = size;
                mapColumnVector.keys.ensureSize(childCount + size, true);
                mapColumnVector.values.ensureSize(childCount + size, true);
                for (Map.Entry<Object, Object> entry : map.entrySet()) {
                    assignRowColumn(mapColumnVector.keys, childCount, mapTypeInfo.getMapKeyTypeInfo(), entry.getKey());
                    assignRowColumn(mapColumnVector.values, childCount, mapTypeInfo.getMapValueTypeInfo(), entry.getValue());
                    childCount++;
                }
                mapColumnVector.childCount = childCount;
            }
            break;
        case STRUCT:
            {
                final StructColumnVector structColumnVector = (StructColumnVector) columnVector;
                final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo;
                final List<TypeInfo> targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos();
                final int size = targetFieldTypeInfos.size();
                if (object instanceof List) {
                    final List struct = (List) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i));
                    }
                } else {
                    final Object[] array = (Object[]) object;
                    for (int i = 0; i < size; i++) {
                        assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]);
                    }
                }
            }
            break;
        case UNION:
            {
                final StandardUnion union = (StandardUnion) object;
                final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector;
                final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo;
                final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
                final byte tag = union.getTag();
                unionColumnVector.tags[batchIndex] = tag;
                assignRowColumn(unionColumnVector.fields[tag], batchIndex, objectTypeInfos.get(tag), union.getObject());
            }
            break;
        default:
            throw new RuntimeException("Category " + targetTypeInfo.getCategory().name() + " not supported");
    }
    /*
     * We always set the null flag to false when there is a value.
     */
    columnVector.isNull[batchIndex] = false;
}

Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Date(java.sql.Date) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StandardUnion(org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion) Map(java.util.Map) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)

Example 72 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorAssignRow method initConvertSourceEntry.

/*
   * Initialize one column's source conversion related arrays.
   * Assumes initTargetEntry has already been called.
   */
private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) {
    isConvert[logicalColumnIndex] = true;
    final Category convertSourceCategory = convertSourceTypeInfo.getCategory();
    convertSourceOI[logicalColumnIndex] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(convertSourceTypeInfo);
    if (convertSourceCategory == Category.PRIMITIVE) {
        // These need to be based on the target.
        final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory();
        switch(targetPrimitiveCategory) {
            case DATE:
                convertTargetWritables[logicalColumnIndex] = new DateWritable();
                break;
            case STRING:
                convertTargetWritables[logicalColumnIndex] = new Text();
                break;
            default:
                // No additional data type specific setting.
                break;
        }
    }
}

Also used : PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Text(org.apache.hadoop.io.Text) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 73 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class FunctionRegistry method getCommonClassForComparison.

/**
 * Find a common class that objects of both TypeInfo a and TypeInfo b can
 * convert to. This is used for comparing objects of type a and type b.
 *
 * When we are comparing string and double, we will always convert both of
 * them to double and then compare.
 *
 * @return null if no common class could be found.
 */
public static synchronized TypeInfo getCommonClassForComparison(TypeInfo a, TypeInfo b) {
    // If same return one of them
    if (a.equals(b)) {
        return a;
    }
    if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) {
        return null;
    }
    PrimitiveCategory pcA = ((PrimitiveTypeInfo) a).getPrimitiveCategory();
    PrimitiveCategory pcB = ((PrimitiveTypeInfo) b).getPrimitiveCategory();
    if (pcA == pcB) {
        // Rely on getTypeInfoForPrimitiveCategory() to sort out the type params.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, pcA);
    }
    PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA);
    PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB);
    if (pgA == pgB) {
        // grouping is same, but category is not.
        if (pgA == PrimitiveGrouping.DATE_GROUP) {
            Integer ai = TypeInfoUtils.dateTypes.get(pcA);
            Integer bi = TypeInfoUtils.dateTypes.get(pcB);
            return (ai > bi) ? a : b;
        }
    }
    // handle string types properly
    if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) {
        // Compare as strings. Char comparison semantics may be different if/when implemented.
        return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, PrimitiveCategory.STRING);
    }
    // timestamp/date is higher precedence than String_GROUP
    if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.DATE_GROUP) {
        return b;
    }
    // date/timestamp is higher precedence than String_GROUP
    if (pgB == PrimitiveGrouping.STRING_GROUP && pgA == PrimitiveGrouping.DATE_GROUP) {
        return a;
    }
    // Another special case, because timestamp is not implicitly convertible to numeric types.
    if ((pgA == PrimitiveGrouping.NUMERIC_GROUP || pgB == PrimitiveGrouping.NUMERIC_GROUP) && (pcA == PrimitiveCategory.TIMESTAMP || pcB == PrimitiveCategory.TIMESTAMP)) {
        return TypeInfoFactory.doubleTypeInfo;
    }
    for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) {
        if (TypeInfoUtils.implicitConvertible(pcA, t) && TypeInfoUtils.implicitConvertible(pcB, t)) {
            return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo) a, (PrimitiveTypeInfo) b, t);
        }
    }
    return null;
}

Also used : UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFXPathInteger(org.apache.hadoop.hive.ql.udf.xml.UDFXPathInteger) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveGrouping(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping)

Example 74 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class FunctionRegistry method getMethodInternal.

/**
 * Gets the closest matching method corresponding to the argument list from a
 * list of methods.
 *
 * @param mlist
 *          The list of methods to inspect.
 * @param exact
 *          Boolean to indicate whether this is an exact match or not.
 * @param argumentsPassed
 *          The classes for the argument.
 * @return The matching method.
 */
public static Method getMethodInternal(Class<?> udfClass, List<Method> mlist, boolean exact, List<TypeInfo> argumentsPassed) throws UDFArgumentException {
    // result
    List<Method> udfMethods = new ArrayList<Method>();
    // The cost of the result
    int leastConversionCost = Integer.MAX_VALUE;
    for (Method m : mlist) {
        List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
        if (argumentsAccepted == null) {
            // null means the method does not accept number of arguments passed.
            continue;
        }
        boolean match = (argumentsAccepted.size() == argumentsPassed.size());
        int conversionCost = 0;
        for (int i = 0; i < argumentsPassed.size() && match; i++) {
            int cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), exact);
            if (cost == -1) {
                match = false;
            } else {
                conversionCost += cost;
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " + argumentsPassed + " accepted = " + argumentsAccepted + " method = " + m);
        }
        if (match) {
            // Always choose the function with least implicit conversions.
            if (conversionCost < leastConversionCost) {
                udfMethods.clear();
                udfMethods.add(m);
                leastConversionCost = conversionCost;
                // Found an exact match
                if (leastConversionCost == 0) {
                    break;
                }
            } else if (conversionCost == leastConversionCost) {
                // Ambiguous call: two methods with the same number of implicit
                // conversions
                udfMethods.add(m);
            // Don't break! We might find a better match later.
            } else {
            // do nothing if implicitConversions > leastImplicitConversions
            }
        }
    }
    if (udfMethods.size() == 0) {
        // No matching methods found
        throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist);
    }
    if (udfMethods.size() > 1) {
        // First try selecting methods based on the type affinity of the arguments passed
        // to the candidate method arguments.
        filterMethodsByTypeAffinity(udfMethods, argumentsPassed);
    }
    if (udfMethods.size() > 1) {
        // if the only difference is numeric types, pick the method
        // with the smallest overall numeric type.
        int lowestNumericType = Integer.MAX_VALUE;
        boolean multiple = true;
        Method candidate = null;
        List<TypeInfo> referenceArguments = null;
        for (Method m : udfMethods) {
            int maxNumericType = 0;
            List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
            if (referenceArguments == null) {
                // keep the arguments for reference - we want all the non-numeric
                // arguments to be the same
                referenceArguments = argumentsAccepted;
            }
            Iterator<TypeInfo> referenceIterator = referenceArguments.iterator();
            for (TypeInfo accepted : argumentsAccepted) {
                TypeInfo reference = referenceIterator.next();
                boolean acceptedIsPrimitive = false;
                PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN;
                if (accepted.getCategory() == Category.PRIMITIVE) {
                    acceptedIsPrimitive = true;
                    acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory();
                }
                if (acceptedIsPrimitive && TypeInfoUtils.numericTypes.containsKey(acceptedPrimCat)) {
                    // We're looking for the udf with the smallest maximum numeric type.
                    int typeValue = TypeInfoUtils.numericTypes.get(acceptedPrimCat);
                    maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType;
                } else if (!accepted.equals(reference)) {
                    // another. We give up at this point.
                    throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
                }
            }
            if (lowestNumericType > maxNumericType) {
                multiple = false;
                lowestNumericType = maxNumericType;
                candidate = m;
            } else if (maxNumericType == lowestNumericType) {
                // multiple udfs with the same max type. Unless we find a lower one
                // we'll give up.
                multiple = true;
            }
        }
        if (!multiple) {
            return candidate;
        } else {
            throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
        }
    }
    return udfMethods.get(0);
}

Also used : ArrayList(java.util.ArrayList) Method(java.lang.reflect.Method) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 75 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class VectorizationContext method getColumnVectorExpression.

private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
    int columnNum = getInputColumnIndex(exprDesc.getColumn());
    VectorExpression expr = null;
    switch(mode) {
        case FILTER:
            // Evaluate the column as a boolean, converting if necessary.
            TypeInfo typeInfo = exprDesc.getTypeInfo();
            if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
                expr = new SelectColumnIsTrue(columnNum);
            } else {
                // Ok, we need to convert.
                ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
                exprAsList.add(exprDesc);
                // First try our cast method that will handle a few special cases.
                VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
                if (castToBooleanExpr == null) {
                    // Ok, try the UDF.
                    castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
                    if (castToBooleanExpr == null) {
                        throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
                    }
                }
                final int outputColumnNum = castToBooleanExpr.getOutputColumnNum();
                expr = new SelectColumnIsTrue(outputColumnNum);
                expr.setChildExpressions(new VectorExpression[] { castToBooleanExpr });
                expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo());
                expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
            }
            break;
        case PROJECTION:
            {
                expr = new IdentityExpression(columnNum);
                TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
                DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
                expr.setInputTypeInfos(identityTypeInfo);
                expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
                expr.setOutputTypeInfo(identityTypeInfo);
                expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
            }
            break;
    }
    return expr;
}

Also used : DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)149 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)58 ArrayList (java.util.ArrayList)55 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)48 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)45 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)41 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)34 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)33 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)33 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)26 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)24 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)23 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)23 BytesWritable (org.apache.hadoop.io.BytesWritable)22 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 Text (org.apache.hadoop.io.Text)21 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)19 Timestamp (java.sql.Timestamp)18 List (java.util.List)18