Search in sources :

Example 1 with IStringInExpr

use of org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr in project hive by apache.

the class VectorizationContext method getStructInExpression.

private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    VectorExpression expr;
    StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
    List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfos.size();
    ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
    InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
    for (int f = 0; f < fieldCount; f++) {
        TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
        // Only primitive fields supports for now.
        if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            return null;
        }
        // We are going to serialize using the 4 basic types.
        ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
        fieldVectorColumnTypes[f] = fieldVectorColumnType;
        // We currently evaluate the IN (..) constants in special ways.
        PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
        InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
        fieldInConstantTypes[f] = inConstantType;
    }
    Output buffer = new Output();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
    final int inChildrenCount = inChildren.size();
    byte[][] serializedInChildren = new byte[inChildrenCount][];
    try {
        for (int i = 0; i < inChildrenCount; i++) {
            final ExprNodeDesc node = inChildren.get(i);
            final Object[] constants;
            if (node instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
                ConstantObjectInspector output = constNode.getWritableObjectInspector();
                constants = ((List<?>) output.getWritableConstantValue()).toArray();
            } else {
                ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
                ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
                ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
                constants = (Object[]) evaluator.evaluate(null);
            }
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < fieldCount; f++) {
                Object constant = constants[f];
                if (constant == null) {
                    binarySortableSerializeWrite.writeNull();
                } else {
                    InConstantType inConstantType = fieldInConstantTypes[f];
                    switch(inConstantType) {
                        case STRING_FAMILY:
                            {
                                byte[] bytes;
                                if (constant instanceof Text) {
                                    Text text = (Text) constant;
                                    bytes = text.getBytes();
                                    binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
                                } else {
                                    throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
                                }
                            }
                            break;
                        case INT_FAMILY:
                            {
                                long value;
                                if (constant instanceof IntWritable) {
                                    value = ((IntWritable) constant).get();
                                } else if (constant instanceof LongWritable) {
                                    value = ((LongWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeLong(value);
                            }
                            break;
                        case FLOAT_FAMILY:
                            {
                                double value;
                                if (constant instanceof DoubleWritable) {
                                    value = ((DoubleWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeDouble(value);
                            }
                            break;
                        // UNDONE...
                        case DATE:
                        case TIMESTAMP:
                        case DECIMAL:
                        default:
                            throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
                    }
                }
            }
            serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // Create a single child representing the scratch column where we will
    // generate the serialized keys of the batch.
    int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
    Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
    expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
    ((IStringInExpr) expr).setInListValues(serializedInChildren);
    ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
    ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.StructColumnInList) FilterStructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList) IStringInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) TruncStringOutput(org.apache.hadoop.hive.ql.exec.vector.expressions.TruncStringOutput) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) IntWritable(org.apache.hadoop.io.IntWritable) FilterStructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList) IStructInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStructInExpr) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Example 2 with IStringInExpr

use of org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr in project hive by apache.

the class VectorizationContext method getInExpression.

/**
 * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
 */
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    ExprNodeDesc colExpr = childExpr.get(0);
    List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
    String colType = colExpr.getTypeString();
    colType = VectorizationContext.mapTypeNameSynonyms(colType);
    TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
    Category category = colTypeInfo.getCategory();
    if (category == Category.STRUCT) {
        return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
    } else if (category != Category.PRIMITIVE) {
        return null;
    }
    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
    /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
     * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
     * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
     * logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would
     * become FALSE in the WHERE clause, and cause the row in question to be filtered out.
     * See the discussion in Jira HIVE-5583.
     */
    VectorExpression expr = null;
    // Validate the IN items are only constants.
    for (ExprNodeDesc inListChild : childrenForInList) {
        if (!(inListChild instanceof ExprNodeConstantDesc)) {
            throw new HiveException("Vectorizing IN expression only supported for constant values");
        }
    }
    // determine class
    Class<?> cl;
    // non-vectorized validates that explicitly during UDF init.
    if (isIntFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ILongInExpr) expr).setInListValues(inVals);
    } else if (isTimestampFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class);
        Timestamp[] inVals = new Timestamp[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getTimestampScalar(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ITimestampInExpr) expr).setInListValues(inVals);
    } else if (isStringFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
        byte[][] inVals = new byte[childrenForInList.size()][];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((IStringInExpr) expr).setInListValues(inVals);
    } else if (isFloatFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
        double[] inValsD = new double[childrenForInList.size()];
        for (int i = 0; i != inValsD.length; i++) {
            inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((IDoubleInExpr) expr).setInListValues(inValsD);
    } else if (isDecimalFamily(colType)) {
        final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr);
        if (tryDecimal64) {
            cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimal64ColumnInList.class : Decimal64ColumnInList.class);
            final int scale = ((DecimalTypeInfo) colExpr.getTypeInfo()).getScale();
            expr = createDecimal64VectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, /* isDecimal64ScaleEstablished */
            true, /* decimal64ColumnScale */
            scale, returnType, DataTypePhysicalVariation.NONE, /* dontRescaleArguments */
            false, new GenericUDFIn());
            if (expr != null) {
                long[] inVals = new long[childrenForInList.size()];
                for (int i = 0; i != inVals.length; i++) {
                    ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) childrenForInList.get(i);
                    HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
                    final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(scale);
                    inVals[i] = decimal64Scalar;
                }
                ((ILongInExpr) expr).setInListValues(inVals);
            }
        }
        if (expr == null) {
            cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
            expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
            HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
            for (int i = 0; i != inValsD.length; i++) {
                inValsD[i] = (HiveDecimal) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
            }
            ((IDecimalInExpr) expr).setInListValues(inValsD);
        }
    } else if (isDateFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
        ((ILongInExpr) expr).setInListValues(inVals);
    }
    // execution to fall back to row mode.
    return expr;
}
Also used : FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) IStringInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) TimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.TimestampColumnInList) FilterTimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) FilterTimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList) DoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) LongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) IDecimalInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.IDecimalInExpr) ILongInExpr(org.apache.hadoop.hive.ql.exec.vector.expressions.ILongInExpr) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Aggregations

ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)2 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)2 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)2 IStringInExpr (org.apache.hadoop.hive.ql.exec.vector.expressions.IStringInExpr)2 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)2 BaseCharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo)2 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)2 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 ArgumentType (org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType)1 InputExpressionType (org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType)1 CastBooleanToStringViaLongToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString)1 CastDateToString (org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString)1