Search in sources :

Example 1 with AppMasterEventOperator

use of org.apache.hadoop.hive.ql.exec.AppMasterEventOperator in project hive by apache.

the class AppMasterEventProcessor method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procCtx;
    AppMasterEventOperator event = (AppMasterEventOperator) nd;
    DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf();
    // simply need to remember that we've seen an event operator.
    context.eventOperatorSet.add(event);
    // and remember link between event and table scan
    List<AppMasterEventOperator> events;
    if (context.tsToEventMap.containsKey(desc.getTableScan())) {
        events = context.tsToEventMap.get(desc.getTableScan());
    } else {
        events = new ArrayList<AppMasterEventOperator>();
    }
    events.add(event);
    context.tsToEventMap.put(desc.getTableScan(), events);
    return true;
}
Also used : AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)

Example 2 with AppMasterEventOperator

use of org.apache.hadoop.hive.ql.exec.AppMasterEventOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinMapJoin.

/*
   * Once we have decided on the map join, the tree would transform from
   *
   *        |                   |
   *       Join               MapJoin
   *       / \                /   \
   *     RS   RS   --->     RS    TS (big table)
   *    /      \           /
   *   TS       TS        TS (small table)
   *
   * for tez.
   */
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException {
    // of the constituent reduce sinks.
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (parentOp instanceof MuxOperator) {
            return null;
        }
    }
    // can safely convert the join to a map join.
    MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
    mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
    List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        // In case of cross join, we disable hybrid grace hash join
        mapJoinOp.getConf().setHybridHashJoin(false);
    }
    Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
    if (parentBigTableOp instanceof ReduceSinkOperator) {
        Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
        if (removeReduceSink) {
            for (Operator<?> p : parentBigTableOp.getParentOperators()) {
                // we might have generated a dynamic partition operator chain. Since
                // we're removing the reduce sink we need do remove that too.
                Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
                Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
                for (Operator<?> c : p.getChildOperators()) {
                    AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
                    if (event != null) {
                        dynamicPartitionOperators.add(c);
                        opEventPairs.put(c, event);
                    }
                }
                for (Operator<?> c : dynamicPartitionOperators) {
                    if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
                        p.removeChild(c);
                        // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
                        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
                    }
                }
                for (Operator<?> op : dynamicPartitionOperators) {
                    context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
                }
            }
            mapJoinOp.getParentOperators().remove(bigTablePosition);
            if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
                mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
            }
            parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
        }
        for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
            if (!(op.getChildOperators().contains(mapJoinOp))) {
                op.getChildOperators().add(mapJoinOp);
            }
            op.getChildOperators().remove(joinOp);
        }
        // join which takes place in a separate task.
        if (context.parseContext.getRsOpToTsOpMap().size() > 0 && removeReduceSink) {
            removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
        }
    }
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) HashMap(java.util.HashMap) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet)

Example 3 with AppMasterEventOperator

use of org.apache.hadoop.hive.ql.exec.AppMasterEventOperator in project hive by apache.

the class RemoveDynamicPruningBySize method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    OptimizeTezProcContext context = (OptimizeTezProcContext) procContext;
    AppMasterEventOperator event = (AppMasterEventOperator) nd;
    AppMasterEventDesc desc = event.getConf();
    if (desc.getStatistics().getDataSize() > context.conf.getLongVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE) && (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(event))) {
        context.pruningOpsRemovedByPriorOpt.add(event);
        GenTezUtils.removeBranch(event);
        // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) desc).getTableScan().getName() + ". Expected data size is too big: " + desc.getStatistics().getDataSize());
    }
    return false;
}
Also used : AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext)

Example 4 with AppMasterEventOperator

use of org.apache.hadoop.hive.ql.exec.AppMasterEventOperator in project hive by apache.

the class GenTezUtils method removeUnionOperators.

// removes any union operator and clones the plan
public static void removeUnionOperators(GenTezProcContext context, BaseWork work, int indexForTezUnion) throws SemanticException {
    List<Operator<?>> roots = new ArrayList<Operator<?>>();
    roots.addAll(work.getAllRootOperators());
    if (work.getDummyOps() != null) {
        roots.addAll(work.getDummyOps());
    }
    roots.addAll(context.eventOperatorSet);
    // need to clone the plan.
    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots, indexForTezUnion);
    // we're cloning the operator plan but we're retaining the original work. That means
    // that root operators have to be replaced with the cloned ops. The replacement map
    // tells you what that mapping is.
    BiMap<Operator<?>, Operator<?>> replacementMap = HashBiMap.create();
    // there's some special handling for dummyOps required. Mapjoins won't be properly
    // initialized if their dummy parents aren't initialized. Since we cloned the plan
    // we need to replace the dummy operators in the work with the cloned ones.
    List<HashTableDummyOperator> dummyOps = new LinkedList<HashTableDummyOperator>();
    Iterator<Operator<?>> it = newRoots.iterator();
    for (Operator<?> orig : roots) {
        Set<FileSinkOperator> fsOpSet = OperatorUtils.findOperators(orig, FileSinkOperator.class);
        for (FileSinkOperator fsOp : fsOpSet) {
            context.fileSinkSet.remove(fsOp);
        }
        Operator<?> newRoot = it.next();
        replacementMap.put(orig, newRoot);
        if (newRoot instanceof HashTableDummyOperator) {
            // dummy ops need to be updated to the cloned ones.
            dummyOps.add((HashTableDummyOperator) newRoot);
            it.remove();
        } else if (newRoot instanceof AppMasterEventOperator) {
            // need to restore the original scan.
            if (newRoot.getConf() instanceof DynamicPruningEventDesc) {
                TableScanOperator ts = ((DynamicPruningEventDesc) orig.getConf()).getTableScan();
                if (ts == null) {
                    throw new AssertionError("No table scan associated with dynamic event pruning. " + orig);
                }
                ((DynamicPruningEventDesc) newRoot.getConf()).setTableScan(ts);
            }
            it.remove();
        } else {
            if (newRoot instanceof TableScanOperator) {
                if (context.tsToEventMap.containsKey(orig)) {
                    // we need to update event operators with the cloned table scan
                    for (AppMasterEventOperator event : context.tsToEventMap.get(orig)) {
                        ((DynamicPruningEventDesc) event.getConf()).setTableScan((TableScanOperator) newRoot);
                    }
                }
                // This TableScanOperator could be part of semijoin optimization.
                Map<ReduceSinkOperator, TableScanOperator> rsOpToTsOpMap = context.parseContext.getRsOpToTsOpMap();
                for (ReduceSinkOperator rs : rsOpToTsOpMap.keySet()) {
                    if (rsOpToTsOpMap.get(rs) == orig) {
                        rsOpToTsOpMap.put(rs, (TableScanOperator) newRoot);
                    }
                }
            }
            context.rootToWorkMap.remove(orig);
            context.rootToWorkMap.put(newRoot, work);
        }
    }
    // now we remove all the unions. we throw away any branch that's not reachable from
    // the current set of roots. The reason is that those branches will be handled in
    // different tasks.
    Deque<Operator<?>> operators = new LinkedList<Operator<?>>();
    operators.addAll(newRoots);
    Set<Operator<?>> seen = new HashSet<Operator<?>>();
    while (!operators.isEmpty()) {
        Operator<?> current = operators.pop();
        seen.add(current);
        if (current instanceof FileSinkOperator) {
            FileSinkOperator fileSink = (FileSinkOperator) current;
            // remember it for additional processing later
            context.fileSinkSet.add(fileSink);
            FileSinkDesc desc = fileSink.getConf();
            Path path = desc.getDirName();
            List<FileSinkDesc> linked;
            if (!context.linkedFileSinks.containsKey(path)) {
                linked = new ArrayList<FileSinkDesc>();
                context.linkedFileSinks.put(path, linked);
            }
            linked = context.linkedFileSinks.get(path);
            linked.add(desc);
            desc.setDirName(new Path(path, "" + linked.size()));
            desc.setLinkedFileSink(true);
            desc.setParentDir(path);
            desc.setLinkedFileSinkDesc(linked);
        }
        if (current instanceof AppMasterEventOperator) {
            // remember for additional processing later
            context.eventOperatorSet.add((AppMasterEventOperator) current);
            // mark the original as abandoned. Don't need it anymore.
            context.abandonedEventOperatorSet.add((AppMasterEventOperator) replacementMap.inverse().get(current));
        }
        if (current instanceof UnionOperator) {
            Operator<?> parent = null;
            int count = 0;
            for (Operator<?> op : current.getParentOperators()) {
                if (seen.contains(op)) {
                    ++count;
                    parent = op;
                }
            }
            // we should have been able to reach the union from only one side.
            assert count <= 1;
            if (parent == null) {
                // root operator is union (can happen in reducers)
                replacementMap.put(current, current.getChildOperators().get(0));
            } else {
                parent.removeChildAndAdoptItsChildren(current);
            }
        }
        if (current instanceof FileSinkOperator || current instanceof ReduceSinkOperator) {
            current.setChildOperators(null);
        } else {
            operators.addAll(current.getChildOperators());
        }
    }
    LOG.debug("Setting dummy ops for work " + work.getName() + ": " + dummyOps);
    work.setDummyOps(dummyOps);
    work.replaceRoots(replacementMap);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Path(org.apache.hadoop.fs.Path) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) BiMap(com.google.common.collect.BiMap) HashBiMap(com.google.common.collect.HashBiMap)

Aggregations

AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)2 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 DynamicPruningEventDesc (org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc)2 BiMap (com.google.common.collect.BiMap)1 HashBiMap (com.google.common.collect.HashBiMap)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Path (org.apache.hadoop.fs.Path)1 CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)1 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 MuxOperator (org.apache.hadoop.hive.ql.exec.MuxOperator)1