Search in sources :

Example 56 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getColumnVectorExpression.

private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
    int columnNum = getInputColumnIndex(exprDesc.getColumn());
    VectorExpression expr = null;
    switch(mode) {
        case FILTER:
            // Evaluate the column as a boolean, converting if necessary.
            TypeInfo typeInfo = exprDesc.getTypeInfo();
            if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
                expr = new SelectColumnIsTrue(columnNum);
            } else {
                // Ok, we need to convert.
                ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
                exprAsList.add(exprDesc);
                // First try our cast method that will handle a few special cases.
                VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
                if (castToBooleanExpr == null) {
                    // Ok, try the UDF.
                    castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, VectorExpressionDescriptor.Mode.PROJECTION, null);
                    if (castToBooleanExpr == null) {
                        throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
                    }
                }
                expr = new SelectColumnIsTrue(castToBooleanExpr.getOutputColumn());
                expr.setChildExpressions(new VectorExpression[] { castToBooleanExpr });
            }
            break;
        case PROJECTION:
            expr = new IdentityExpression(columnNum, exprDesc.getTypeString());
            break;
    }
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 57 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getInExpression.

/**
   * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
   */
private VectorExpression getInExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    ExprNodeDesc colExpr = childExpr.get(0);
    List<ExprNodeDesc> inChildren = childExpr.subList(1, childExpr.size());
    String colType = colExpr.getTypeString();
    colType = VectorizationContext.mapTypeNameSynonyms(colType);
    TypeInfo colTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(colType);
    Category category = colTypeInfo.getCategory();
    if (category == Category.STRUCT) {
        return getStructInExpression(childExpr, colExpr, colTypeInfo, inChildren, mode, returnType);
    } else if (category != Category.PRIMITIVE) {
        return null;
    }
    // prepare arguments for createVectorExpression
    List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(inChildren);
    /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
     * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
     * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
     * logic correctly. E.g. NOT (col IN (null)) should be considered UNKNOWN, so that would
     * become FALSE in the WHERE clause, and cause the row in question to be filtered out.
     * See the discussion in Jira HIVE-5583.
     */
    VectorExpression expr = null;
    // Validate the IN items are only constants.
    for (ExprNodeDesc inListChild : childrenForInList) {
        if (!(inListChild instanceof ExprNodeConstantDesc)) {
            throw new HiveException("Vectorizing IN expression only supported for constant values");
        }
    }
    // determine class
    Class<?> cl = null;
    //       non-vectorized validates that explicitly during UDF init.
    if (isIntFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((ILongInExpr) expr).setInListValues(inVals);
    } else if (isTimestampFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class);
        Timestamp[] inVals = new Timestamp[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getTimestampScalar(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((ITimestampInExpr) expr).setInListValues(inVals);
    } else if (isStringFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class);
        byte[][] inVals = new byte[childrenForInList.size()][];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((IStringInExpr) expr).setInListValues(inVals);
    } else if (isFloatFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDoubleColumnInList.class : DoubleColumnInList.class);
        double[] inValsD = new double[childrenForInList.size()];
        for (int i = 0; i != inValsD.length; i++) {
            inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((IDoubleInExpr) expr).setInListValues(inValsD);
    } else if (isDecimalFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterDecimalColumnInList.class : DecimalColumnInList.class);
        HiveDecimal[] inValsD = new HiveDecimal[childrenForInList.size()];
        for (int i = 0; i != inValsD.length; i++) {
            inValsD[i] = (HiveDecimal) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((IDecimalInExpr) expr).setInListValues(inValsD);
    } else if (isDateFamily(colType)) {
        cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class);
        long[] inVals = new long[childrenForInList.size()];
        for (int i = 0; i != inVals.length; i++) {
            inVals[i] = (Long) getVectorTypeScalarValue((ExprNodeConstantDesc) childrenForInList.get(i));
        }
        expr = createVectorExpression(cl, childExpr.subList(0, 1), VectorExpressionDescriptor.Mode.PROJECTION, returnType);
        ((ILongInExpr) expr).setInListValues(inVals);
    }
    // execution to fall back to row mode.
    return expr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) VectorUDAFAvgLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong) VectorUDAFStdSampLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLong) VectorUDAFVarSampLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong) VectorUDAFMaxLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong) VectorUDAFMinLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong) VectorUDAFStdPopLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong) VectorUDAFSumLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong) VectorUDAFVarPopLong(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 58 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class IndexPredicateAnalyzer method translateSearchConditions.

/**
   * Translates search conditions back to ExprNodeDesc form (as
   * a left-deep conjunction).
   *
   * @param searchConditions (typically produced by analyzePredicate)
   *
   * @return ExprNodeGenericFuncDesc form of search conditions
   */
public ExprNodeGenericFuncDesc translateSearchConditions(List<IndexSearchCondition> searchConditions) {
    ExprNodeGenericFuncDesc expr = null;
    for (IndexSearchCondition searchCondition : searchConditions) {
        if (expr == null) {
            expr = searchCondition.getIndexExpr();
            continue;
        }
        List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
        children.add(expr);
        children.add(searchCondition.getIndexExpr());
        expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), children);
    }
    return expr;
}
Also used : ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 59 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ConvertAstToSearchArg method findVariable.

/**
   * Find the variable in the expression.
   * @param expr the expression to look in
   * @return the index of the variable or -1 if there is not exactly one
   *   variable.
   */
private int findVariable(ExprNodeDesc expr) {
    int result = -1;
    List<ExprNodeDesc> children = expr.getChildren();
    for (int i = 0; i < children.size(); ++i) {
        ExprNodeDesc child = children.get(i);
        if (child instanceof ExprNodeColumnDesc) {
            // if we already found a variable, this isn't a sarg
            if (result != -1) {
                return -1;
            } else {
                result = i;
            }
        }
    }
    return result;
}
Also used : ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 60 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ConvertAstToSearchArg method findLiteral.

/**
   * Find the child that is the literal.
   * @param expr the parent node to check
   * @param type the type of the expression
   * @return the literal boxed if found or null
   */
private static Object findLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type) {
    List<ExprNodeDesc> children = expr.getChildren();
    if (children.size() != 2) {
        return null;
    }
    Object result = null;
    for (ExprNodeDesc child : children) {
        Object currentResult = getLiteral(conf, child, type);
        if (currentResult != null) {
            // Both children in the expression should not be literal
            if (result != null) {
                return null;
            }
            result = currentResult;
        }
    }
    return result;
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32