Search in sources :

Example 91 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ColumnPrunerProcCtx method getColsFromSelectExpr.

/**
   * Creates the list of internal column names (represented by field nodes)
   * from select expressions in a select operator. This function is used for the
   * select operator instead of the genColLists function (which is used by
   * the rest of the operators).
   *
   * @param op The select operator.
   * @return a list of field nodes representing the internal column names.
   */
public List<FieldNode> getColsFromSelectExpr(SelectOperator op) {
    List<FieldNode> cols = new ArrayList<>();
    SelectDesc conf = op.getConf();
    if (conf.isSelStarNoCompute()) {
        for (ColumnInfo colInfo : op.getSchema().getSignature()) {
            cols.add(new FieldNode(colInfo.getInternalName()));
        }
    } else {
        List<ExprNodeDesc> exprList = conf.getColList();
        for (ExprNodeDesc expr : exprList) {
            cols = mergeFieldNodesWithDesc(cols, expr);
        }
    }
    return cols;
}
Also used : ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 92 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ColumnPrunerProcCtx method handleFilterUnionChildren.

/**
   * If the input filter operator has direct child(ren) which are union operator,
   * and the filter's column is not the same as union's
   * create select operator between them. The select operator has same number of columns as
   * pruned child operator.
   *
   * @param curOp
   *          The filter operator which need to handle children.
   * @throws SemanticException
   */
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
        return;
    }
    List<FieldNode> parentPrunList = prunedColLists.get(curOp);
    if (parentPrunList == null || parentPrunList.size() == 0) {
        return;
    }
    List<FieldNode> prunList = null;
    for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
        if (child instanceof UnionOperator) {
            prunList = genColLists(child);
            if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
                continue;
            }
            ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
            ArrayList<String> outputColNames = new ArrayList<String>();
            Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
            ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
            for (ColumnInfo colInfo : child.getSchema().getSignature()) {
                if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
                    continue;
                }
                ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
                exprs.add(colDesc);
                outputColNames.add(colInfo.getInternalName());
                ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                newCol.setAlias(colInfo.getAlias());
                outputRS.add(newCol);
                colExprMap.put(colInfo.getInternalName(), colDesc);
            }
            SelectDesc select = new SelectDesc(exprs, outputColNames, false);
            curOp.removeChild(child);
            SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
            OperatorFactory.makeChild(sel, child);
            sel.setColumnExprMap(colExprMap);
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 93 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ColumnPrunerProcFactory method getPruneReduceSinkOpRetainFlags.

private static boolean[] getPruneReduceSinkOpRetainFlags(List<String> retainedParentOpOutputCols, ReduceSinkOperator reduce) {
    ReduceSinkDesc reduceConf = reduce.getConf();
    java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf.getValueCols();
    boolean[] flags = new boolean[originalValueEval.size()];
    for (int i = 0; i < originalValueEval.size(); i++) {
        flags[i] = false;
        List<String> current = originalValueEval.get(i).getCols();
        if (current == null || current.size() == 0) {
            flags[i] = true;
        } else {
            for (int j = 0; j < current.size(); j++) {
                if (retainedParentOpOutputCols.contains(current.get(j))) {
                    flags[i] = true;
                    break;
                }
            }
        }
    }
    return flags;
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 94 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ConstantPropagateProcFactory method foldExprFull.

/**
   * Fold input expression desc.
   *
   * This function recursively checks if any subexpression of a specified expression
   * can be evaluated to be constant and replaces such subexpression with the constant.
   * If the expression is a deterministic UDF and all the subexpressions are constants,
   * the value will be calculated immediately (during compilation time vs. runtime).
   * e.g.:
   *   concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
   *   unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
   * @param desc folding expression
   * @param constants current propagated constant map
   * @param cppCtx
   * @param op processing operator
   * @param propagate if true, assignment expressions will be added to constants.
   * @return fold expression
   * @throws UDFArgumentException
   */
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
    // Combine NOT operator with the child operator. Otherwise, the following optimization
    // from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
    // should not be optimized to not(x > 3), but (x <=3 or x is null).
    desc = foldNegative(desc);
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
        GenericUDF udf = funcDesc.getGenericUDF();
        boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
        List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDesc childExpr : desc.getChildren()) {
            newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
        }
        // Don't evaluate nondeterministic function since the value can only calculate during runtime.
        if (!isDeterministicUdf(udf, newExprs)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
            }
            ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            return desc;
        } else {
            // If all child expressions of deterministic function are constants, evaluate such UDF immediately
            ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
            if (constant != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Folding expression:" + desc + " -> " + constant);
                }
                return constant;
            } else {
                // Check if the function can be short cut.
                ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
                if (shortcut != null) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Folding expression:" + desc + " -> " + shortcut);
                    }
                    return shortcut;
                }
                ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            }
            // constant, add them to colToConstants as half-deterministic columns.
            if (propagate) {
                propagate(udf, newExprs, op.getSchema(), constants);
            }
        }
        return desc;
    } else if (desc instanceof ExprNodeColumnDesc) {
        if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
            return desc;
        }
        Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
        ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
        if (col != null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Folding expression:" + desc + " -> " + col);
            }
            return col;
        }
    }
    return desc;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Serializable(java.io.Serializable) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 95 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ConstantPropagateProcFactory method foldNegative.

/**
   * Combines the logical not() operator with the child operator if possible.
   * @param desc the expression to be evaluated
   * @return  the new expression to be replaced
   * @throws UDFArgumentException
   */
private static ExprNodeDesc foldNegative(ExprNodeDesc desc) throws UDFArgumentException {
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
        GenericUDF udf = funcDesc.getGenericUDF();
        if (udf instanceof GenericUDFOPNot) {
            ExprNodeDesc child = funcDesc.getChildren().get(0);
            if (child instanceof ExprNodeGenericFuncDesc) {
                ExprNodeGenericFuncDesc childDesc = (ExprNodeGenericFuncDesc) child;
                GenericUDF childUDF = childDesc.getGenericUDF();
                List<ExprNodeDesc> grandChildren = child.getChildren();
                if (childUDF instanceof GenericUDFBaseCompare || childUDF instanceof GenericUDFOPNull || childUDF instanceof GenericUDFOPNotNull) {
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(grandChild));
                    }
                    return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
                } else if (childUDF instanceof GenericUDFOPAnd || childUDF instanceof GenericUDFOPOr) {
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), Arrays.asList(grandChild))));
                    }
                    return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
                } else if (childUDF instanceof GenericUDFOPNot) {
                    return foldNegative(child.getChildren().get(0));
                } else {
                    // For operator like if() that cannot be handled, leave not() as it
                    // is and continue processing the children
                    List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
                    for (ExprNodeDesc grandChild : grandChildren) {
                        newGrandChildren.add(foldNegative(grandChild));
                    }
                    childDesc.setChildren(newGrandChildren);
                    return funcDesc;
                }
            }
        }
    }
    return desc;
}
Also used : GenericUDFOPNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) List(java.util.List) ArrayList(java.util.ArrayList) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32