Search in sources :

Example 1 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class SharedWorkOptimizer method pushFilterToTopOfTableScan.

private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException {
    ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
    List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
    for (Operator<? extends OperatorDesc> op : allChildren) {
        if (op instanceof FilterOperator) {
            FilterOperator filterOp = (FilterOperator) op;
            ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
            if (tableScanExprNode.isSame(filterExprNode)) {
                // We do not need to do anything
                return;
            }
            if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
                    if (childExprNode.isSame(filterExprNode)) {
                        // so probably we pushed previously
                        return;
                    }
                }
            }
            ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
            filterOp.getConf().setPredicate(newPred);
        } else {
            Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
            tsOp.replaceChild(op, newOp);
            newOp.getParentOperators().add(tsOp);
            op.replaceParent(tsOp, newOp);
            newOp.getChildOperators().add(op);
            // Add to cache (same group as tsOp)
            optimizerCache.putIfWorkExists(newOp, tsOp);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)

Example 2 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.

private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp) throws SemanticException {
    LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
    LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
    Set<Operator<?>> discardableInputOps = new HashSet<>();
    long dataSize = 0L;
    long maxDataSize = 0L;
    retainableOps.add(retainableTsOp);
    discardableOps.add(discardableTsOp);
    Operator<?> equalOp1 = retainableTsOp;
    Operator<?> equalOp2 = discardableTsOp;
    if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
        // TODO: Support checking multiple child operators to merge further.
        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }
    Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
    Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
    // Special treatment for Filter operator that ignores the DPP predicates
    if (currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
        boolean equalFilters = false;
        FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
        FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
        if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
            Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
            Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
            if (conjsOp1String.equals(conjsOp2String)) {
                equalFilters = true;
            }
        }
        if (equalFilters) {
            equalOp1 = currentOp1;
            equalOp2 = currentOp2;
            retainableOps.add(equalOp1);
            discardableOps.add(equalOp2);
            if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
                // TODO: Support checking multiple child operators to merge further.
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
                return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
            }
            currentOp1 = currentOp1.getChildOperators().get(0);
            currentOp2 = currentOp2.getChildOperators().get(0);
        } else {
            // Bail out
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
            return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
        }
    }
    return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, false);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) LinkedHashSet(java.util.LinkedHashSet) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 3 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class ColumnPrunerProcCtx method handleFilterUnionChildren.

/**
 * If the input filter operator has direct child(ren) which are union operator,
 * and the filter's column is not the same as union's
 * create select operator between them. The select operator has same number of columns as
 * pruned child operator.
 *
 * @param curOp
 *          The filter operator which need to handle children.
 * @throws SemanticException
 */
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
        return;
    }
    List<FieldNode> parentPrunList = prunedColLists.get(curOp);
    if (parentPrunList == null || parentPrunList.size() == 0) {
        return;
    }
    List<FieldNode> prunList = null;
    for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
        if (child instanceof UnionOperator) {
            prunList = genColLists(child);
            if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
                continue;
            }
            ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
            ArrayList<String> outputColNames = new ArrayList<String>();
            Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
            ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
            for (ColumnInfo colInfo : child.getSchema().getSignature()) {
                if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
                    continue;
                }
                ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
                exprs.add(colDesc);
                outputColNames.add(colInfo.getInternalName());
                ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                newCol.setAlias(colInfo.getAlias());
                outputRS.add(newCol);
                colExprMap.put(colInfo.getInternalName(), colDesc);
            }
            SelectDesc select = new SelectDesc(exprs, outputColNames, false);
            curOp.removeChild(child);
            SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
            OperatorFactory.makeChild(sel, child);
            sel.setColumnExprMap(colExprMap);
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 4 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class GlobalLimitOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, TableScanOperator> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    // is used.
    if (topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
        // Here we recursively check:
        // 1. whether there are exact one LIMIT in the query
        // 2. whether there is no aggregation, group-by, distinct, sort by,
        // distributed by, or table sampling in any of the sub-query.
        // The query only qualifies if both conditions are satisfied.
        // 
        // Example qualified queries:
        // CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
        // INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
        // FROM ... LIMIT...
        // SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
        // 
        TableScanOperator ts = topOps.values().iterator().next();
        Table tab = ts.getConf().getTableMetadata();
        if (tab.isNonNative()) {
            LOG.info("Not enabling limit optimization on non native table: " + tab.getTableName());
            return pctx;
        }
        // InputFormat.getSplits wont be called if no input path & TS Vertex will have 0 task parallelism
        if (tab.getStorageHandler() == null) {
            LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
            // query qualify for the optimization
            if (tempGlobalLimit != null) {
                LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
                Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
                if (!tab.isPartitioned()) {
                    if (filterOps.size() == 0) {
                        Integer tempOffset = tempGlobalLimitDesc.getOffset();
                        globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                    }
                } else {
                    // check if the pruner only contains partition columns
                    if (onlyContainsPartnCols(tab, filterOps)) {
                        String alias = (String) topOps.keySet().toArray()[0];
                        PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
                        // the filter to prune correctly
                        if (!partsList.hasUnknownPartitions()) {
                            Integer tempOffset = tempGlobalLimitDesc.getOffset();
                            globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                        }
                    }
                }
                if (globalLimitCtx.isEnable()) {
                    LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
                    LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
                }
            }
        }
    }
    return pctx;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GlobalLimitCtx(org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)

Example 5 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class PredicateTransitivePropagate method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    TransitiveContext context = new TransitiveContext();
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();
    // insert new filter between RS and parent of RS
    for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
        ReduceSinkOperator reducer = entry.getKey();
        Operator<?> parent = reducer.getParentOperators().get(0);
        List<ExprNodeDesc> exprs = entry.getValue();
        if (parent instanceof FilterOperator) {
            exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
            ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
            ((FilterOperator) parent).getConf().setPredicate(merged);
        } else {
            ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
            RowSchema parentRS = parent.getSchema();
            Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
        }
    }
    return pGraphContext;
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 IDriver (org.apache.hadoop.hive.ql.IDriver)12 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)12 PlanMapper (org.apache.hadoop.hive.ql.plan.mapper.PlanMapper)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 List (java.util.List)6 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)6 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 HashMap (java.util.HashMap)5