Examples with DynamicPruningEventDesc - org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc

Example 1 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class TezCompiler method findParallelSemiJoinBranch.

private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, Map<ReduceSinkOperator, TableScanOperator> semijoins) {
    boolean parallelEdges = false;
    for (Operator<?> op : mapjoin.getParentOperators()) {
        if (!(op instanceof ReduceSinkOperator)) {
            continue;
        }
        op = op.getParentOperators().get(0);
        // Follow the Reducesink operator upstream which is on small table side.
        while (!(op instanceof ReduceSinkOperator) && !(op instanceof TableScanOperator) && !(op.getChildren() != null && op.getChildren().size() > 1)) {
            if (op instanceof MapJoinOperator) {
                // ReduceSink, that is what we are looking for.
                for (Operator<?> parentOp : op.getParentOperators()) {
                    if (parentOp instanceof ReduceSinkOperator) {
                        continue;
                    }
                    // parent in current pipeline
                    op = parentOp;
                    continue;
                }
            }
            op = op.getParentOperators().get(0);
        }
        // Bail out if RS or TS is encountered.
        if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) {
            continue;
        }
        // A branch is hit.
        for (Node nd : op.getChildren()) {
            if (nd instanceof SelectOperator) {
                Operator<?> child = (Operator<?>) nd;
                while (child.getChildOperators().size() > 0) {
                    child = child.getChildOperators().get(0);
                }
                // If not ReduceSink Op, skip
                if (!(child instanceof ReduceSinkOperator)) {
                    // This still could be DPP.
                    if (child instanceof AppMasterEventOperator && ((AppMasterEventOperator) child).getConf() instanceof DynamicPruningEventDesc) {
                        // DPP indeed, Set parallel edges true
                        parallelEdges = true;
                    }
                    continue;
                }
                ReduceSinkOperator rs = (ReduceSinkOperator) child;
                SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(rs);
                if (sjInfo == null) {
                    continue;
                }
                TableScanOperator ts = sjInfo.getTsOp();
                if (ts != bigTableTS) {
                    // skip, not the one we are looking for.
                    continue;
                }
                parallelEdges = true;
                if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) {
                    // Created by hint, skip it
                    continue;
                }
                // Add the semijoin branch to the map
                semijoins.put(rs, ts);
            }
        }
    }
    return parallelEdges;
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Node(org.apache.hadoop.hive.ql.lib.Node) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Example 2 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class ConvertJoinMapJoin method convertJoinMapJoin.

/*
   * Once we have decided on the map join, the tree would transform from
   *
   *        |                   |
   *       Join               MapJoin
   *       / \                /   \
   *     RS   RS   --->     RS    TS (big table)
   *    /      \           /
   *   TS       TS        TS (small table)
   *
   * for tez.
   */
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, boolean removeReduceSink) throws SemanticException {
    // of the constituent reduce sinks.
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (parentOp instanceof MuxOperator) {
            return null;
        }
    }
    // can safely convert the join to a map join.
    final int bigTablePosition = mapJoinConversion.getBigTablePos();
    MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
    if (mapJoinOp == null) {
        return null;
    }
    MapJoinDesc mapJoinDesc = mapJoinOp.getConf();
    mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
    List<ExprNodeDesc> joinExprs = mapJoinDesc.getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        // In case of cross join, we disable hybrid grace hash join
        mapJoinDesc.setHybridHashJoin(false);
    }
    Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
    if (parentBigTableOp instanceof ReduceSinkOperator) {
        Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
        if (removeReduceSink) {
            for (Operator<?> p : parentBigTableOp.getParentOperators()) {
                // we might have generated a dynamic partition operator chain. Since
                // we're removing the reduce sink we need do remove that too.
                Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
                Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
                for (Operator<?> c : p.getChildOperators()) {
                    AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
                    if (event != null) {
                        dynamicPartitionOperators.add(c);
                        opEventPairs.put(c, event);
                    }
                }
                for (Operator<?> c : dynamicPartitionOperators) {
                    if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
                        p.removeChild(c);
                        // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
                        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
                    }
                }
                for (Operator<?> op : dynamicPartitionOperators) {
                    context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
                }
            }
            mapJoinOp.getParentOperators().remove(bigTablePosition);
            if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
                mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
            }
            parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
        }
        for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
            if (!(op.getChildOperators().contains(mapJoinOp))) {
                op.getChildOperators().add(mapJoinOp);
            }
            op.getChildOperators().remove(joinOp);
        }
        // join which takes place in a separate task.
        if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
            removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
        }
    }
    return mapJoinOp;
}

Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) HashMap(java.util.HashMap) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet)

Example 3 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class DynamicPartitionPruningOptimization method generateEventOperatorPlan.

private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String column, String columnType, ExprNodeDesc predicate) {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.getKeyCol();
    // we also need the expr for the partitioned table
    ExprNodeDesc partKey = ctx.parent.getChildren().get(0);
    LOG.debug("key expr: {}; partition key expr: {}", key, partKey);
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    outputNames.add(HiveConf.getColumnInternalName(0));
    ArrayList<ColumnInfo> selectColInfos = new ArrayList<ColumnInfo>();
    selectColInfos.add(new ColumnInfo(outputNames.get(0), key.getTypeInfo(), "", false));
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(selectColInfos), parentOfRS);
    Map<String, ExprNodeDesc> selectColumnExprMap = new HashMap<>();
    selectColumnExprMap.put(outputNames.get(0), key);
    selectOp.setColumnExprMap(selectColumnExprMap);
    // do a group by on the list to dedup
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    ExprNodeDesc groupByExpr = new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
    groupByExprs.add(groupByExpr);
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs, new ArrayList<AggregationDesc>(), false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, true);
    ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
    groupbyColInfos.add(new ColumnInfo(outputNames.get(0), key.getTypeInfo(), "", false));
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
    Map<String, ExprNodeDesc> colMap = new HashMap<String, ExprNodeDesc>();
    colMap.put(outputNames.get(0), groupByExpr);
    groupByOp.setColumnExprMap(colMap);
    // finally add the event broadcast operator
    if (HiveConf.getVar(parseContext.getConf(), ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        DynamicPruningEventDesc eventDesc = new DynamicPruningEventDesc();
        eventDesc.setTableScan(ts);
        eventDesc.setGenerator(ctx.generator);
        eventDesc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        eventDesc.setTargetColumnName(column);
        eventDesc.setTargetColumnType(columnType);
        eventDesc.setPartKey(partKey);
        if (predicate != null) {
            eventDesc.setPredicate(predicate.clone());
        }
        OperatorFactory.getAndMakeChild(eventDesc, groupByOp);
    } else {
        // Must be spark branch
        SparkPartitionPruningSinkDesc desc = new SparkPartitionPruningSinkDesc();
        desc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        desc.addTarget(column, columnType, partKey, null, ts);
        SparkPartitionPruningSinkOperator dppSink = (SparkPartitionPruningSinkOperator) OperatorFactory.getAndMakeChild(desc, groupByOp);
        if (HiveConf.getBoolVar(parseContext.getConf(), ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
            mayReuseExistingDPPSink(parentOfRS, Arrays.asList(selectOp, groupByOp, dppSink));
        }
    }
}

Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator)

Example 4 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class AppMasterEventProcessor method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procCtx;
    AppMasterEventOperator event = (AppMasterEventOperator) nd;
    DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf();
    // simply need to remember that we've seen an event operator.
    context.eventOperatorSet.add(event);
    // and remember link between event and table scan
    List<AppMasterEventOperator> events;
    if (context.tsToEventMap.containsKey(desc.getTableScan())) {
        events = context.tsToEventMap.get(desc.getTableScan());
    } else {
        events = new ArrayList<AppMasterEventOperator>();
    }
    events.add(event);
    context.tsToEventMap.put(desc.getTableScan(), events);
    return true;
}

Also used : AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Example 5 with DynamicPruningEventDesc

use of org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc in project hive by apache.

the class SharedWorkOptimizer method sharedWorkExtendedOptimization.

private static void sharedWorkExtendedOptimization(ParseContext pctx, SharedWorkOptimizerCache optimizerCache) throws SemanticException {
    // Gather RS operators that 1) belong to root works, i.e., works containing TS operators,
    // and 2) share the same input operator.
    // These will be the first target for extended shared work optimization
    Multimap<Operator<?>, ReduceSinkOperator> parentToRsOps = ArrayListMultimap.create();
    Set<Operator<?>> visited = new HashSet<>();
    for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) {
        gatherReduceSinkOpsByInput(parentToRsOps, visited, findWorkOperators(optimizerCache, e.getValue()));
    }
    Set<Operator<?>> removedOps = new HashSet<>();
    while (!parentToRsOps.isEmpty()) {
        // As above, we enforce a certain order when we do the reutilization.
        // In particular, we use size of data in RS x number of uses.
        List<Entry<Operator<?>, Long>> sortedRSGroups = rankOpsByAccumulatedSize(parentToRsOps.keySet());
        LOG.debug("Sorted operators by size: {}", sortedRSGroups);
        // Execute extended optimization
        // For each RS, check whether other RS in same work could be merge into this one.
        // If they are merged, RS operators in the resulting work will be considered
        // mergeable in next loop iteration.
        Multimap<Operator<?>, ReduceSinkOperator> existingRsOps = ArrayListMultimap.create();
        for (Entry<Operator<?>, Long> rsGroupInfo : sortedRSGroups) {
            Operator<?> rsParent = rsGroupInfo.getKey();
            for (ReduceSinkOperator discardableRsOp : parentToRsOps.get(rsParent)) {
                if (removedOps.contains(discardableRsOp)) {
                    LOG.debug("Skip {} as it has already been removed", discardableRsOp);
                    continue;
                }
                Collection<ReduceSinkOperator> otherRsOps = existingRsOps.get(rsParent);
                for (ReduceSinkOperator retainableRsOp : otherRsOps) {
                    if (retainableRsOp.getChildOperators().size() == 0) {
                        // just skip this RS - its a semijoin/bloomfilter related RS
                        continue;
                    }
                    if (removedOps.contains(retainableRsOp)) {
                        LOG.debug("Skip {} as it has already been removed", retainableRsOp);
                        continue;
                    }
                    // First we quickly check if the two RS operators can actually be merged.
                    // We already know that these two RS operators have the same parent, but
                    // we need to check whether both RS are actually equal. Further, we check
                    // whether their child is also equal. If any of these conditions are not
                    // met, we are not going to try to merge.
                    boolean mergeable = compareOperator(pctx, retainableRsOp, discardableRsOp) && compareOperator(pctx, retainableRsOp.getChildOperators().get(0), discardableRsOp.getChildOperators().get(0));
                    if (!mergeable) {
                        // Skip
                        LOG.debug("{} and {} cannot be merged", retainableRsOp, discardableRsOp);
                        continue;
                    }
                    LOG.debug("Checking additional conditions for merging subtree starting at {}" + " into subtree starting at {}", discardableRsOp, retainableRsOp);
                    // Secondly, we extract information about the part of the tree that can be merged
                    // as well as some structural information (memory consumption) that needs to be
                    // used to determined whether the merge can happen
                    Operator<?> retainableRsOpChild = retainableRsOp.getChildOperators().get(0);
                    Operator<?> discardableRsOpChild = discardableRsOp.getChildOperators().get(0);
                    SharedResult sr = extractSharedOptimizationInfo(pctx, optimizerCache, retainableRsOp, discardableRsOp, retainableRsOpChild, discardableRsOpChild);
                    // tables.
                    if (sr.retainableOps.isEmpty() || !validPreConditions(pctx, optimizerCache, sr)) {
                        // Skip
                        LOG.debug("{} and {} do not meet preconditions", retainableRsOp, discardableRsOp);
                        continue;
                    }
                    deduplicateReduceTraits(retainableRsOp.getConf(), discardableRsOp.getConf());
                    // We can merge
                    Operator<?> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
                    Operator<?> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
                    if (lastDiscardableOp.getNumChild() != 0) {
                        List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(lastDiscardableOp.getChildOperators());
                        for (Operator<? extends OperatorDesc> op : allChildren) {
                            lastDiscardableOp.getChildOperators().remove(op);
                            op.replaceParent(lastDiscardableOp, lastRetainableOp);
                            lastRetainableOp.getChildOperators().add(op);
                        }
                    }
                    LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableRsOp, retainableRsOp);
                    // we are going to eliminate
                    for (Operator<?> op : sr.discardableInputOps) {
                        OperatorUtils.removeOperator(op);
                        optimizerCache.removeOp(op);
                        removedOps.add(op);
                        // Remove DPP predicates
                        if (op instanceof ReduceSinkOperator) {
                            SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
                            if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp()) && !sr.discardableInputOps.contains(sjbi.getTsOp())) {
                                GenTezUtils.removeSemiJoinOperator(pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
                                optimizerCache.tableScanToDPPSource.remove(sjbi.getTsOp(), op);
                            }
                        } else if (op instanceof AppMasterEventOperator) {
                            DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
                            if (!sr.discardableOps.contains(dped.getTableScan()) && !sr.discardableInputOps.contains(dped.getTableScan())) {
                                GenTezUtils.removeSemiJoinOperator(pctx, (AppMasterEventOperator) op, dped.getTableScan());
                                optimizerCache.tableScanToDPPSource.remove(dped.getTableScan(), op);
                            }
                        }
                        LOG.debug("Input operator removed: {}", op);
                    }
                    // We remove the discardable RS operator
                    OperatorUtils.removeOperator(discardableRsOp);
                    optimizerCache.removeOp(discardableRsOp);
                    removedOps.add(discardableRsOp);
                    LOG.debug("Operator removed: {}", discardableRsOp);
                    // Then we merge the operators of the works we are going to merge
                    optimizerCache.removeOpAndCombineWork(discardableRsOpChild, retainableRsOpChild);
                    // Finally we remove the rest of the expression from the tree
                    for (Operator<?> op : sr.discardableOps) {
                        OperatorUtils.removeOperator(op);
                        optimizerCache.removeOp(op);
                        removedOps.add(op);
                        LOG.debug("Operator removed: {}", op);
                    }
                    if (pctx.getConf().getBoolVar(ConfVars.HIVE_SHARED_WORK_DOWNSTREAM_MERGE)) {
                        if (sr.discardableOps.size() == 1) {
                            downStreamMerge(retainableRsOp, optimizerCache, pctx);
                        }
                    }
                    break;
                }
                if (removedOps.contains(discardableRsOp)) {
                    // This operator has been removed, remove it from the list of existing operators
                    existingRsOps.remove(rsParent, discardableRsOp);
                } else {
                    // This operator has not been removed, include it in the list of existing operators
                    existingRsOps.put(rsParent, discardableRsOp);
                }
            }
        }
        // We gather the operators that will be used for next iteration of extended optimization
        // (if any)
        parentToRsOps = ArrayListMultimap.create();
        visited = new HashSet<>();
        for (Entry<Operator<?>, ReduceSinkOperator> e : existingRsOps.entries()) {
            if (removedOps.contains(e.getValue()) || e.getValue().getNumChild() < 1) {
                // semijoin RS), we can quickly skip this one
                continue;
            }
            gatherReduceSinkOpsByInput(parentToRsOps, visited, findWorkOperators(optimizerCache, e.getValue().getChildOperators().get(0)));
        }
    }
    // Remove unused table scan operators
    pctx.getTopOps().entrySet().removeIf((Entry<String, TableScanOperator> e) -> e.getValue().getNumChild() == 0);
}

Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) Entry(java.util.Map.Entry) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Aggregations

DynamicPruningEventDesc (org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)16 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)14 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)13 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)9 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)9 HashSet (java.util.HashSet)7 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 TezDummyStoreOperator (org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator)7 LinkedHashSet (java.util.LinkedHashSet)6 SemiJoinBranchInfo (org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo)6 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)6 HashMap (java.util.HashMap)4