Search in sources :

Example 21 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class ConstantPropagateProcCtx method getPropagatedConstants.

/**
 * Get propagated constant map from parents.
 *
 * Traverse all parents of current operator, if there is propagated constant (determined by
 * assignment expression like column=constant value), resolve the column using RowResolver and add
 * it to current constant map.
 *
 * @param op
 *        operator getting the propagated constants.
 * @return map of ColumnInfo to ExprNodeDesc. The values of that map must be either
 *         ExprNodeConstantDesc or ExprNodeNullDesc.
 */
public Map<ColumnInfo, ExprNodeDesc> getPropagatedConstants(Operator<? extends Serializable> op) {
    // this map should map columnInfo to ExprConstantNodeDesc
    Map<ColumnInfo, ExprNodeDesc> constants = new HashMap<ColumnInfo, ExprNodeDesc>();
    if (op.getSchema() == null) {
        return constants;
    }
    RowSchema rs = op.getSchema();
    LOG.debug("Getting constants of op:" + op + " with rs:" + rs);
    if (op.getParentOperators() == null) {
        return constants;
    }
    // A previous solution is based on tableAlias and colAlias, which is
    // unsafe, esp. when CBO generates derived table names. see HIVE-13602.
    // For correctness purpose, we only trust colExpMap.
    // We assume that CBO can do the constantPropagation before this function is
    // called to help improve the performance.
    // UnionOperator, LimitOperator and FilterOperator are special, they should already be
    // column-position aligned.
    List<Map<Integer, ExprNodeDesc>> parentsToConstant = new ArrayList<>();
    boolean areAllParentsContainConstant = true;
    boolean noParentsContainConstant = true;
    for (Operator<?> parent : op.getParentOperators()) {
        Map<ColumnInfo, ExprNodeDesc> constMap = opToConstantExprs.get(parent);
        if (constMap == null) {
            LOG.debug("Constant of Op " + parent.getOperatorId() + " is not found");
            areAllParentsContainConstant = false;
        } else {
            noParentsContainConstant = false;
            Map<Integer, ExprNodeDesc> map = new HashMap<>();
            for (Entry<ColumnInfo, ExprNodeDesc> entry : constMap.entrySet()) {
                map.put(parent.getSchema().getPosition(entry.getKey().getInternalName()), entry.getValue());
            }
            parentsToConstant.add(map);
            LOG.debug("Constant of Op " + parent.getOperatorId() + " " + constMap);
        }
    }
    if (noParentsContainConstant) {
        return constants;
    }
    List<ColumnInfo> signature = op.getSchema().getSignature();
    if (op instanceof LimitOperator || op instanceof FilterOperator) {
        // there should be only one parent.
        if (op.getParentOperators().size() == 1) {
            Map<Integer, ExprNodeDesc> parentToConstant = parentsToConstant.get(0);
            for (int index = 0; index < signature.size(); index++) {
                if (parentToConstant.containsKey(index)) {
                    constants.put(signature.get(index), parentToConstant.get(index));
                }
            }
        }
    } else if (op instanceof UnionOperator && areAllParentsContainConstant) {
        for (int index = 0; index < signature.size(); index++) {
            ExprNodeDesc constant = null;
            for (Map<Integer, ExprNodeDesc> parentToConstant : parentsToConstant) {
                if (!parentToConstant.containsKey(index)) {
                    // if this parent does not contain a constant at this position, we
                    // continue to look at other positions.
                    constant = null;
                    break;
                } else {
                    if (constant == null) {
                        constant = parentToConstant.get(index);
                    } else {
                        // compare if they are the same constant.
                        ExprNodeDesc nextConstant = parentToConstant.get(index);
                        if (!nextConstant.isSame(constant)) {
                            // they are not the same constant. for example, union all of 1
                            // and 2.
                            constant = null;
                            break;
                        }
                    }
                }
            }
            // we have checked all the parents for the "index" position.
            if (constant != null) {
                constants.put(signature.get(index), constant);
            }
        }
    } else if (op instanceof JoinOperator) {
        JoinOperator joinOp = (JoinOperator) op;
        Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = joinOp.getConf().getExprs().entrySet().iterator();
        while (itr.hasNext()) {
            Entry<Byte, List<ExprNodeDesc>> e = itr.next();
            int tag = e.getKey();
            Operator<?> parent = op.getParentOperators().get(tag);
            List<ExprNodeDesc> exprs = e.getValue();
            if (exprs == null) {
                continue;
            }
            for (ExprNodeDesc expr : exprs) {
                // we are only interested in ExprNodeColumnDesc
                if (expr instanceof ExprNodeColumnDesc) {
                    String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
                    // find this parentColName in its parent's rs
                    int parentPos = parent.getSchema().getPosition(parentColName);
                    if (parentsToConstant.get(tag).containsKey(parentPos)) {
                        // reverse look up colExprMap to find the childColName
                        if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
                            for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
                                if (entry.getValue().isSame(expr)) {
                                    // now propagate the constant from the parent to the child
                                    constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(tag).get(parentPos));
                                }
                            }
                        }
                    }
                }
            }
        }
    } else {
        // there should be only one parent.
        if (op.getParentOperators().size() == 1) {
            Operator<?> parent = op.getParentOperators().get(0);
            if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
                for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
                    if (op.getSchema().getPosition(entry.getKey()) == -1) {
                        // Not present
                        continue;
                    }
                    ExprNodeDesc expr = entry.getValue();
                    if (expr instanceof ExprNodeColumnDesc) {
                        String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
                        // find this parentColName in its parent's rs
                        int parentPos = parent.getSchema().getPosition(parentColName);
                        if (parentsToConstant.get(0).containsKey(parentPos)) {
                            // this position in parent is a constant
                            // now propagate the constant from the parent to the child
                            constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(0).get(parentPos));
                        }
                    }
                }
            }
        }
    }
    LOG.debug("Offering constants " + constants.keySet() + " to operator " + op.toString());
    return constants;
}
Also used : JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) Entry(java.util.Map.Entry) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) HashMap(java.util.HashMap) Map(java.util.Map)

Example 22 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class ConstantPropagateProcFactory method shortcutFunction.

private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs, Operator<? extends Serializable> op) throws UDFArgumentException {
    if (udf instanceof GenericUDFOPEqual) {
        assert newExprs.size() == 2;
        boolean foundUDFInFirst = false;
        ExprNodeGenericFuncDesc caseOrWhenexpr = null;
        if (newExprs.get(0) instanceof ExprNodeGenericFuncDesc) {
            caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(0);
            if (caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase) {
                foundUDFInFirst = true;
            }
        }
        if (!foundUDFInFirst && newExprs.get(1) instanceof ExprNodeGenericFuncDesc) {
            caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(1);
            if (!(caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase)) {
                return null;
            }
        }
        if (null == caseOrWhenexpr) {
            // we didn't find case or when udf
            return null;
        }
        GenericUDF childUDF = caseOrWhenexpr.getGenericUDF();
        List<ExprNodeDesc> children = new ArrayList(caseOrWhenexpr.getChildren());
        int i;
        if (childUDF instanceof GenericUDFWhen) {
            for (i = 1; i < children.size(); i += 2) {
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            if (children.size() % 2 == 1) {
                i = children.size() - 1;
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            // after constant folding of child expression the return type of UDFWhen might have changed,
            // so recreate the expression
            ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
            return newCaseOrWhenExpr;
        } else if (childUDF instanceof GenericUDFCase) {
            for (i = 2; i < children.size(); i += 2) {
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            if (children.size() % 2 == 0) {
                i = children.size() - 1;
                children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
            }
            // after constant folding of child expression the return type of UDFCase might have changed,
            // so recreate the expression
            ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
            return newCaseOrWhenExpr;
        } else {
            // cant happen
            return null;
        }
    }
    if (udf instanceof GenericUDFOPAnd) {
        final BitSet positionsToRemove = new BitSet();
        final List<ExprNodeDesc> notNullExprs = new ArrayList<ExprNodeDesc>();
        final List<Integer> notNullExprsPositions = new ArrayList<Integer>();
        final List<ExprNodeDesc> compareExprs = new ArrayList<ExprNodeDesc>();
        for (int i = 0; i < newExprs.size(); i++) {
            ExprNodeDesc childExpr = newExprs.get(i);
            if (childExpr instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
                if (Boolean.TRUE.equals(c.getValue())) {
                    // if true, prune it
                    positionsToRemove.set(i);
                } else {
                    if (Boolean.FALSE.equals(c.getValue())) {
                        // if false, return false
                        return childExpr;
                    }
                }
            } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc) {
                notNullExprs.add(childExpr.getChildren().get(0));
                notNullExprsPositions.add(i);
            } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFBaseCompare && !(((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotEqual) && childExpr.getChildren().size() == 2) {
                // Try to fold (key <op> 86) and (key is not null) to (key <op> 86)
                // where <op> can be "=", ">=", "<=", ">", "<".
                // Note: (key <> 86) and (key is not null) cannot be folded
                ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0));
                if (null == colDesc) {
                    colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1));
                }
                if (colDesc != null) {
                    compareExprs.add(colDesc);
                }
            }
        }
        // Try to fold (key = 86) and (key is not null) to (key = 86)
        for (int i = 0; i < notNullExprs.size(); i++) {
            for (ExprNodeDesc other : compareExprs) {
                if (notNullExprs.get(i).isSame(other)) {
                    positionsToRemove.set(notNullExprsPositions.get(i));
                    break;
                }
            }
        }
        // Remove unnecessary expressions
        int pos = 0;
        int removed = 0;
        while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
            newExprs.remove(pos - removed);
            pos++;
            removed++;
        }
        if (newExprs.size() == 0) {
            return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
        }
        if (newExprs.size() == 1) {
            return newExprs.get(0);
        }
    }
    if (udf instanceof GenericUDFOPOr) {
        final BitSet positionsToRemove = new BitSet();
        for (int i = 0; i < newExprs.size(); i++) {
            ExprNodeDesc childExpr = newExprs.get(i);
            if (childExpr instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
                if (Boolean.FALSE.equals(c.getValue())) {
                    // if false, prune it
                    positionsToRemove.set(i);
                } else if (Boolean.TRUE.equals(c.getValue())) {
                    // if true return true
                    return childExpr;
                }
            }
        }
        int pos = 0;
        int removed = 0;
        while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
            newExprs.remove(pos - removed);
            pos++;
            removed++;
        }
        if (newExprs.size() == 0) {
            return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE);
        }
        if (newExprs.size() == 1) {
            return newExprs.get(0);
        }
    }
    if (udf instanceof GenericUDFWhen) {
        if (!(newExprs.size() == 2 || newExprs.size() == 3)) {
            // we currently only handle either 1 or 2 branch.
            return null;
        }
        ExprNodeDesc thenExpr = newExprs.get(1);
        ExprNodeDesc elseExpr = newExprs.size() == 3 ? newExprs.get(2) : new ExprNodeConstantDesc(newExprs.get(1).getTypeInfo(), null);
        ExprNodeDesc whenExpr = newExprs.get(0);
        if (whenExpr instanceof ExprNodeConstantDesc) {
            Boolean whenVal = (Boolean) ((ExprNodeConstantDesc) whenExpr).getValue();
            return (whenVal == null || Boolean.FALSE.equals(whenVal)) ? elseExpr : thenExpr;
        }
        if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
            ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
            ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
            Object thenVal = constThen.getValue();
            Object elseVal = constElse.getValue();
            if (thenVal == null) {
                if (elseVal == null) {
                    // both branches are null.
                    return thenExpr;
                } else if (op instanceof FilterOperator) {
                    // we can still fold, since here null is equivalent to false.
                    return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), newExprs.subList(0, 1)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
                } else {
                    // can't do much, expression is not in context of filter, so we can't treat null as equivalent to false here.
                    return null;
                }
            } else if (elseVal == null && op instanceof FilterOperator) {
                return Boolean.TRUE.equals(thenVal) ? whenExpr : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
            } else if (thenVal.equals(elseVal)) {
                return thenExpr;
            } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
                List<ExprNodeDesc> children = new ArrayList<>();
                children.add(whenExpr);
                children.add(new ExprNodeConstantDesc(false));
                ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFCoalesce(), children);
                if (Boolean.TRUE.equals(thenVal)) {
                    return func;
                } else {
                    List<ExprNodeDesc> exprs = new ArrayList<>();
                    exprs.add(func);
                    return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
                }
            } else {
                return null;
            }
        }
    }
    if (udf instanceof GenericUDFCase) {
        // where ss_sold_date= '1998-01-01' ;
        if (!(newExprs.size() == 3 || newExprs.size() == 4)) {
            // we currently only handle either 1 or 2 branch.
            return null;
        }
        ExprNodeDesc thenExpr = newExprs.get(2);
        ExprNodeDesc elseExpr = newExprs.size() == 4 ? newExprs.get(3) : new ExprNodeConstantDesc(newExprs.get(2).getTypeInfo(), null);
        if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
            ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
            ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
            Object thenVal = constThen.getValue();
            Object elseVal = constElse.getValue();
            if (thenVal == null) {
                if (null == elseVal) {
                    return thenExpr;
                } else if (op instanceof FilterOperator) {
                    return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
                } else {
                    return null;
                }
            } else if (null == elseVal && op instanceof FilterOperator) {
                return Boolean.TRUE.equals(thenVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
            } else if (thenVal.equals(elseVal)) {
                return thenExpr;
            } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
                ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2));
                List<ExprNodeDesc> children = new ArrayList<>();
                children.add(equal);
                children.add(new ExprNodeConstantDesc(false));
                ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFCoalesce(), children);
                if (Boolean.TRUE.equals(thenVal)) {
                    return func;
                } else {
                    List<ExprNodeDesc> exprs = new ArrayList<>();
                    exprs.add(func);
                    return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
                }
            } else {
                return null;
            }
        }
    }
    if (udf instanceof GenericUDFUnixTimeStamp) {
        if (newExprs.size() >= 1) {
            // unix_timestamp(args) -> to_unix_timestamp(args)
            return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs);
        }
    }
    return null;
}
Also used : GenericUDFCase(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ArrayList(java.util.ArrayList) GenericUDFUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp) GenericUDFOPNotNull(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull) GenericUDFOPNotEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual) GenericUDFBaseCompare(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare) GenericUDFOPEqual(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) BitSet(java.util.BitSet) GenericUDFToUnixTimeStamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFCoalesce(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) GenericUDFOPNot(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 23 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class GlobalLimitOptimizer method checkQbpForGlobalLimit.

/**
 * Check the limit number in all sub queries
 *
 * @return if there is one and only one limit for all subqueries, return the limit
 *         if there is no limit, return 0
 *         otherwise, return null
 */
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
    Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
    Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
    // existsOrdering AND existsPartitioning should be false.
    for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
        ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
        if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
            return null;
        }
    }
    // - There cannot exist any (distinct) aggregate.
    for (Operator<?> op : ops.get(GroupByOperator.class)) {
        GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
        if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
            return null;
        }
    }
    // - There cannot exist any sampling predicate.
    for (Operator<?> op : ops.get(FilterOperator.class)) {
        FilterDesc filterConf = ((FilterOperator) op).getConf();
        if (filterConf.getIsSamplingPred()) {
            return null;
        }
    }
    // If there is one and only one limit starting at op, return the limit
    // If there is no limit, return 0
    // Otherwise, return null
    Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
    if (limitOps.size() == 1) {
        return (LimitOperator) limitOps.iterator().next();
    } else if (limitOps.size() == 0) {
        return null;
    }
    return null;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ImmutableSet(com.google.common.collect.ImmutableSet) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 24 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class SharedWorkOptimizer method replaceSemijoinExpressions.

/**
 * When we call this method, we have already verified that the SJ expressions targeting
 * two TS operators are the same.
 * Since we already had a method to push the filter expressions on top of the discardable
 * TS (pushFilterToTopOfTableScan), here we remove the old SJ expressions from the
 * discardable TS (and follow-up Filters if present) and we add the SJ expressions
 * from the retainable TS. That way the SJ expressions will be pushed on top of the
 * discardable TS by pushFilterToTopOfTableScan.
 */
private static void replaceSemijoinExpressions(TableScanOperator tsOp, List<ExprNodeDesc> semijoinExprNodes) {
    ExprNodeDesc constNode = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
    // TS operator
    if (tsOp.getConf().getFilterExpr() != null) {
        ExprNodeDesc tsFilterExpr = tsOp.getConf().getFilterExpr();
        if (FunctionRegistry.isOpAnd(tsFilterExpr)) {
            tsFilterExpr.getChildren().removeIf(SharedWorkOptimizer::isSemijoinExpr);
            tsFilterExpr.getChildren().addAll(semijoinExprNodes);
            if (tsFilterExpr.getChildren().isEmpty() || (tsFilterExpr.getChildren().size() == 1 && !(tsFilterExpr.getChildren().get(0) instanceof ExprNodeGenericFuncDesc))) {
                tsOp.getConf().setFilterExpr(null);
            }
        }
    }
    // Filter operators on top
    if (tsOp.getChildOperators() != null) {
        for (Operator op : tsOp.getChildOperators()) {
            if (op instanceof FilterOperator) {
                FilterOperator filterOp = (FilterOperator) op;
                ExprNodeDesc filterExpr = filterOp.getConf().getPredicate();
                if (FunctionRegistry.isOpAnd(filterExpr)) {
                    filterExpr.getChildren().removeIf(SharedWorkOptimizer::isSemijoinExpr);
                    if (filterExpr.getChildren().isEmpty()) {
                        filterOp.getConf().setPredicate(constNode);
                    } else if (filterExpr.getChildren().size() == 1) {
                        filterOp.getConf().setPredicate(filterExpr.getChildren().get(0));
                    }
                }
            }
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 25 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.

private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp, boolean mayRemoveDownStreamOperators, boolean mayRemoveInputOps) throws SemanticException {
    LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
    LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
    Set<Operator<?>> discardableInputOps = new HashSet<>();
    long dataSize = 0L;
    long maxDataSize = 0L;
    retainableOps.add(retainableTsOp);
    discardableOps.add(discardableTsOp);
    Operator<?> equalOp1 = retainableTsOp;
    Operator<?> equalOp2 = discardableTsOp;
    if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
        // TODO: Support checking multiple child operators to merge further.
        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }
    if (retainableTsOp.getChildOperators().size() == 0 || discardableTsOp.getChildOperators().size() == 0) {
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }
    Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
    Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
    // Special treatment for Filter operator that ignores the DPP predicates
    if (mayRemoveDownStreamOperators && currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
        boolean equalFilters = false;
        FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
        FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
        if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
            Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
            Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
            if (conjsOp1String.equals(conjsOp2String)) {
                equalFilters = true;
            }
        }
        if (equalFilters) {
            equalOp1 = currentOp1;
            equalOp2 = currentOp2;
            retainableOps.add(equalOp1);
            discardableOps.add(equalOp2);
            if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
                // TODO: Support checking multiple child operators to merge further.
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
                return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
            }
            currentOp1 = currentOp1.getChildOperators().get(0);
            currentOp2 = currentOp2.getChildOperators().get(0);
        } else {
            // Bail out
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
            return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
        }
    }
    return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, mayRemoveDownStreamOperators, mayRemoveInputOps);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) LinkedHashSet(java.util.LinkedHashSet) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 IDriver (org.apache.hadoop.hive.ql.IDriver)12 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)12 PlanMapper (org.apache.hadoop.hive.ql.plan.mapper.PlanMapper)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 List (java.util.List)6 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)6 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 HashMap (java.util.HashMap)5