Search in sources :

Example 11 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class DynamicPartitionPruningOptimization method generateEventOperatorPlan.

private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String column, String columnType) {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
    // we also need the expr for the partitioned table
    ExprNodeDesc partKey = ctx.parent.getChildren().get(0);
    if (LOG.isDebugEnabled()) {
        LOG.debug("key expr: " + key);
        LOG.debug("partition key expr: " + partKey);
    }
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    outputNames.add(HiveConf.getColumnInternalName(0));
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, parentOfRS);
    // do a group by on the list to dedup
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    ExprNodeDesc groupByExpr = new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
    groupByExprs.add(groupByExpr);
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs, new ArrayList<AggregationDesc>(), false, groupByMemoryUsage, memoryThreshold, null, false, -1, true);
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, selectOp);
    Map<String, ExprNodeDesc> colMap = new HashMap<String, ExprNodeDesc>();
    colMap.put(outputNames.get(0), groupByExpr);
    groupByOp.setColumnExprMap(colMap);
    // finally add the event broadcast operator
    if (HiveConf.getVar(parseContext.getConf(), ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        DynamicPruningEventDesc eventDesc = new DynamicPruningEventDesc();
        eventDesc.setTableScan(ts);
        eventDesc.setGenerator(ctx.generator);
        eventDesc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        eventDesc.setTargetColumnName(column);
        eventDesc.setTargetColumnType(columnType);
        eventDesc.setPartKey(partKey);
        OperatorFactory.getAndMakeChild(eventDesc, groupByOp);
    } else {
        // Must be spark branch
        SparkPartitionPruningSinkDesc desc = new SparkPartitionPruningSinkDesc();
        desc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        desc.addTarget(column, columnType, partKey, null, ts);
        SparkPartitionPruningSinkOperator dppSink = (SparkPartitionPruningSinkOperator) OperatorFactory.getAndMakeChild(desc, groupByOp);
        if (HiveConf.getBoolVar(parseContext.getConf(), ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
            mayReuseExistingDPPSink(parentOfRS, Arrays.asList(selectOp, groupByOp, dppSink));
        }
    }
}
Also used : GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator)

Example 12 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class SharedWorkOptimizer method findChildWorkOperators.

private static Set<Operator<?>> findChildWorkOperators(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
    // Find operators in work
    Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
    // Gather output works operators
    Set<Operator<?>> set = new HashSet<Operator<?>>();
    for (Operator<?> op : workOps) {
        if (op instanceof ReduceSinkOperator) {
            if (op.getChildOperators() != null) {
                // All children of RS are descendants
                for (Operator<?> child : op.getChildOperators()) {
                    set.addAll(findWorkOperators(optimizerCache, child));
                }
            }
            // Semijoin DPP work is considered a child because work needs
            // to finish for it to execute
            SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
            if (sjbi != null) {
                set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
            }
        } else if (op.getConf() instanceof DynamicPruningEventDesc) {
            // DPP work is considered a child because work needs
            // to finish for it to execute
            set.addAll(findWorkOperators(optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
        }
    }
    return set;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Example 13 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class TezCompiler method findParallelSemiJoinBranch.

private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, Map<ReduceSinkOperator, TableScanOperator> semijoins, Map<TableScanOperator, List<MapJoinOperator>> probeDecodeMJoins) {
    boolean parallelEdges = false;
    for (Operator<?> op : mapjoin.getParentOperators()) {
        if (!(op instanceof ReduceSinkOperator)) {
            continue;
        }
        op = op.getParentOperators().get(0);
        // Follow the Reducesink operator upstream which is on small table side.
        while (!(op instanceof ReduceSinkOperator) && !(op instanceof TableScanOperator) && !(op.getChildren() != null && op.getChildren().size() > 1)) {
            if (op instanceof MapJoinOperator) {
                // ReduceSink, that is what we are looking for.
                for (Operator<?> parentOp : op.getParentOperators()) {
                    if (parentOp instanceof ReduceSinkOperator) {
                        continue;
                    }
                    // parent in current pipeline
                    op = parentOp;
                    continue;
                }
            }
            op = op.getParentOperators().get(0);
        }
        // Bail out if RS or TS is encountered.
        if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) {
            continue;
        }
        // A branch is hit.
        for (Node nd : op.getChildren()) {
            if (nd instanceof SelectOperator) {
                Operator<?> child = (Operator<?>) nd;
                while (child.getChildOperators().size() > 0) {
                    child = child.getChildOperators().get(0);
                }
                // If not ReduceSink Op, skip
                if (!(child instanceof ReduceSinkOperator)) {
                    // This still could be DPP.
                    if (child instanceof AppMasterEventOperator && ((AppMasterEventOperator) child).getConf() instanceof DynamicPruningEventDesc) {
                        // DPP indeed, Set parallel edges true
                        parallelEdges = true;
                    }
                    continue;
                }
                ReduceSinkOperator rs = (ReduceSinkOperator) child;
                SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(rs);
                if (sjInfo == null) {
                    continue;
                }
                TableScanOperator ts = sjInfo.getTsOp();
                if (ts != bigTableTS) {
                    // skip, not the one we are looking for.
                    continue;
                }
                parallelEdges = true;
                // Keep track of Mj to probeDecode TS
                if (!probeDecodeMJoins.containsKey(ts)) {
                    probeDecodeMJoins.put(ts, new ArrayList<>());
                }
                probeDecodeMJoins.get(ts).add((MapJoinOperator) mapjoin);
                // Skip adding to SJ removal map when created by hint
                if (!sjInfo.getIsHint() && sjInfo.getShouldRemove()) {
                    semijoins.put(rs, ts);
                }
            }
        }
    }
    return parallelEdges;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Node(org.apache.hadoop.hive.ql.lib.Node) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Example 14 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class TezCompiler method connect.

private void connect(Operator<?> o, AtomicInteger index, Stack<Operator<?>> nodes, Map<Operator<?>, Integer> indexes, Map<Operator<?>, Integer> lowLinks, Set<Set<Operator<?>>> components, ParseContext parseContext) {
    indexes.put(o, index.get());
    lowLinks.put(o, index.get());
    index.incrementAndGet();
    nodes.push(o);
    List<Operator<?>> children;
    if (o instanceof AppMasterEventOperator) {
        children = new ArrayList<>((o.getChildOperators()));
        TableScanOperator ts = ((DynamicPruningEventDesc) o.getConf()).getTableScan();
        LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString());
        children.add(ts);
    } else if (o instanceof TerminalOperator) {
        children = new ArrayList<>((o.getChildOperators()));
        for (ReduceSinkOperator rs : parseContext.getTerminalOpToRSMap().get((TerminalOperator<?>) o)) {
            // add an edge
            LOG.debug("Adding special edge: From terminal op to semijoin edge " + o.getName() + " --> " + rs.toString());
            children.add(rs);
        }
        if (o instanceof ReduceSinkOperator) {
            // semijoin case
            SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(o);
            if (sjInfo != null) {
                TableScanOperator ts = sjInfo.getTsOp();
                LOG.debug("Adding special edge: " + o.getName() + " --> " + ts.toString());
                children.add(ts);
            }
        }
    } else {
        children = o.getChildOperators();
    }
    for (Operator<?> child : children) {
        if (!indexes.containsKey(child)) {
            connect(child, index, nodes, indexes, lowLinks, components, parseContext);
            lowLinks.put(o, Math.min(lowLinks.get(o), lowLinks.get(child)));
        } else if (nodes.contains(child)) {
            lowLinks.put(o, Math.min(lowLinks.get(o), indexes.get(child)));
        }
    }
    if (lowLinks.get(o).equals(indexes.get(o))) {
        Set<Operator<?>> component = new LinkedHashSet<Operator<?>>();
        components.add(component);
        Operator<?> current;
        do {
            current = nodes.pop();
            component.add(current);
        } while (current != o);
    }
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) LinkedHashSet(java.util.LinkedHashSet) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)

Example 15 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class SharedWorkOptimizer method findChildWorkOperators.

private static Set<Operator<?>> findChildWorkOperators(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, Operator<?> start, boolean traverseEventOperators) {
    // Find operators in work
    Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
    // Gather output works operators
    Set<Operator<?>> set = new HashSet<Operator<?>>();
    for (Operator<?> op : workOps) {
        if (op instanceof ReduceSinkOperator) {
            if (op.getChildOperators() != null) {
                // All children of RS are descendants
                for (Operator<?> child : op.getChildOperators()) {
                    set.addAll(findWorkOperators(optimizerCache, child));
                }
            }
            // Semijoin DPP work is considered a child because work needs
            // to finish for it to execute
            SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
            if (sjbi != null) {
                set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
            }
        } else if (op.getConf() instanceof DynamicPruningEventDesc) {
            // to finish for it to execute
            if (traverseEventOperators) {
                set.addAll(findWorkOperators(optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
            }
        }
    }
    return set;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Aggregations

DynamicPruningEventDesc (org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)16 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)14 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)13 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)9 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)9 HashSet (java.util.HashSet)7 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 TezDummyStoreOperator (org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator)7 LinkedHashSet (java.util.LinkedHashSet)6 SemiJoinBranchInfo (org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo)6 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 HashMap (java.util.HashMap)4